diff mbox series

[ovs-dev,1/4] conntrack: remove nat_conn

Message ID 20201129033255.64647-2-hepeng.0320@bytedance.com
State Changes Requested
Headers show
Series [ovs-dev,1/4] conntrack: remove nat_conn | expand

Commit Message

.贺鹏 Nov. 29, 2020, 3:32 a.m. UTC
From: hepeng <hepeng.0320@bytedance.com>

Currently, when doing NAT, the userspace conntrack will use an extra
conn for the two directions in a flow. However, each conn has actually
the two keys for both orig and rev flow directions. This patch
introduces a conn_dir member in the conn and it consists of both rev and
orig cmap_node for hash lookup. This saves extra allocation for
nat_conn, and makes userspace code much cleaner.

We also introduces a conn_flags member to reduce the memory footprint of a
conn.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
---
 lib/conntrack-private.h |  20 +--
 lib/conntrack.c         | 264 ++++++++++++++++------------------------
 2 files changed, 121 insertions(+), 163 deletions(-)

Comments

0-day Robot Nov. 29, 2020, 3:59 a.m. UTC | #1
Bleep bloop.  Greetings hepeng.0320, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author hepeng <hepeng.0320@bytedance.com> needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors or committers: Peng He <hepeng.0320@bytedance.com>
ERROR: Inappropriate spacing in pointer declaration
WARNING: Line lacks whitespace around operator
#669 FILE: lib/conntrack.c:2488:
        struct conn* conn;

Lines checked: 729, Warnings: 2, Errors: 2


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Aaron Conole Feb. 24, 2021, 5:37 p.m. UTC | #2
"hepeng.0320" <hepeng.0320@bytedance.com> writes:

> From: hepeng <hepeng.0320@bytedance.com>
>
> Currently, when doing NAT, the userspace conntrack will use an extra
> conn for the two directions in a flow. However, each conn has actually
> the two keys for both orig and rev flow directions. This patch
> introduces a conn_dir member in the conn and it consists of both rev and
> orig cmap_node for hash lookup. This saves extra allocation for
> nat_conn, and makes userspace code much cleaner.

If I'm understanding this correctly, you still re-insert the conn into
the conn list?

> We also introduces a conn_flags member to reduce the memory footprint of a
> conn.

We should split this out to a separate patch.

> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> ---
>  lib/conntrack-private.h |  20 +--
>  lib/conntrack.c         | 264 ++++++++++++++++------------------------
>  2 files changed, 121 insertions(+), 163 deletions(-)
>
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 789af82ff..6bec43d3f 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -83,21 +83,23 @@ struct alg_exp_node {
>      bool nat_rpl_dst;
>  };
>  
> -enum OVS_PACKED_ENUM ct_conn_type {
> -    CT_CONN_TYPE_DEFAULT,
> -    CT_CONN_TYPE_UN_NAT,
> +#define CONN_FLAG_NAT_MASK 0xf
> +
> +struct conn_dir {
> +    struct cmap_node cm_node;
> +    bool orig;

This naming is confusing.  We can have 'conn->orig->orig'.  Consider
renaming one of these fields to distinguish what is going on.  I would
prefer seeing 'fwd' used (since it's the forward direction).

>  };
>  
>  struct conn {
>      /* Immutable data. */
>      struct conn_key key;
> +    struct conn_dir orig;
>      struct conn_key rev_key;
> +    struct conn_dir rev;

I think this might be better if setup like:
+enum ct_direction {
+    CT_DIRECTION_FWD,
+    CT_DIRECTION_REV,
+    CT_DIRECTIONS
+}
+
+struct conn_data {
+    struct conn_key key;
+    struct cmap_node cm_node;
 };
 
 struct conn {
-    struct conn_key key;
-    struct conn_key rev_key;
+    struct conn_data cn_data[CT_DIRECTIONS];
...

Then in the code, we can always get orig and rev information:

  conn->cn_data[CT_DIRECTION_FWD]
  conn->cn_data[CT_DIRECTION_REV]

Did I miss something?

>      struct conn_key parent_key; /* Only used for orig_tuple support. */
>      struct ovs_list exp_node;
> -    struct cmap_node cm_node;
> -    struct nat_action_info_t *nat_info;
> +    uint64_t conn_flags;

As mentioned, please separate this as a different patch.  This is new
piece of functionality.

>      char *alg;
> -    struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
>  
>      /* Mutable data. */
>      struct ovs_mutex lock; /* Guards all mutable fields. */
> @@ -117,11 +119,15 @@ struct conn {
>  
>      /* Immutable data. */
>      bool alg_related; /* True if alg data connection. */
> -    enum ct_conn_type conn_type;
>  
>      uint32_t tp_id; /* Timeout policy ID. */
>  };
>  
> +static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> +    return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
> +                           CONTAINER_OF(conndir, struct conn, rev);
> +}
> +
>  enum ct_update_res {
>      CT_UPDATE_INVALID,
>      CT_UPDATE_VALID,
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 930ed0be6..73d3a2fb2 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -94,7 +94,6 @@ static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
>                               uint32_t tp_id);
>  static void delete_conn_cmn(struct conn *);
>  static void delete_conn(struct conn *);
> -static void delete_conn_one(struct conn *conn);
>  static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
>                                        struct dp_packet *pkt,
>                                        struct conn_lookup_ctx *ctx,
> @@ -108,8 +107,8 @@ static void set_label(struct dp_packet *, struct conn *,
>  static void *clean_thread_main(void *f_);
>  
>  static bool
> -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> -                       struct conn *nat_conn);
> +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> +                       const struct nat_action_info_t *nat_action);
>  
>  static uint8_t
>  reverse_icmp_type(uint8_t type);
> @@ -233,6 +232,7 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
>      return 1;
>  }
>  
> +#if 0

Please remove functions which are no longer used.

>  static void
>  ct_print_conn_info(const struct conn *c, const char *log_msg,
>                     enum vlog_level vll, bool force, bool rl_on)
> @@ -287,6 +287,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
>          }
>      }
>  }
> +#endif
>  
>  /* Initializes the connection tracker 'ct'.  The caller is responsible for
>   * calling 'conntrack_destroy()', when the instance is not needed anymore */
> @@ -437,7 +438,7 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
>      }
>  
>      uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
> -    cmap_remove(&ct->conns, &conn->cm_node, hash);
> +    cmap_remove(&ct->conns, &conn->orig.cm_node, hash);
>  
>      struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
>      if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) {
> @@ -451,12 +452,10 @@ static void
>  conn_clean(struct conntrack *ct, struct conn *conn)
>      OVS_REQUIRES(ct->ct_lock)
>  {
> -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> -
>      conn_clean_cmn(ct, conn);
> -    if (conn->nat_conn) {
> -        uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
> -        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
> +    if (conn->conn_flags & CONN_FLAG_NAT_MASK) {
> +        uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> +        cmap_remove(&ct->conns, &conn->rev.cm_node, hash);
>      }
>      ovs_list_remove(&conn->exp_node);
>      conn->cleaned = true;
> @@ -464,19 +463,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>      atomic_count_dec(&ct->n_conn);
>  }
>  
> -static void
> -conn_clean_one(struct conntrack *ct, struct conn *conn)
> -    OVS_REQUIRES(ct->ct_lock)
> -{
> -    conn_clean_cmn(ct, conn);
> -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -        ovs_list_remove(&conn->exp_node);
> -        conn->cleaned = true;
> -        atomic_count_dec(&ct->n_conn);
> -    }
> -    ovsrcu_postpone(delete_conn_one, conn);
> -}
> -
>  /* Destroys the connection tracker 'ct' and frees all the allocated memory.
>   * The caller of this function must already have shut down packet input
>   * and PMD threads (which would have been quiesced).  */
> @@ -488,9 +474,14 @@ conntrack_destroy(struct conntrack *ct)
>      pthread_join(ct->clean_thread, NULL);
>      latch_destroy(&ct->clean_thread_exit);
>  
> +    struct conn_dir *conndir;
>      ovs_mutex_lock(&ct->ct_lock);
> -    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> -        conn_clean_one(ct, conn);
> +    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> +        if (!conndir->orig) {
> +            continue;
> +        }
> +        conn = conn_from_conndir(conndir);
> +        conn_clean(ct, conn);
>      }
>      cmap_destroy(&ct->conns);
>  
> @@ -530,9 +521,11 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
>                  bool *reply)
>  {
>      struct conn *conn;
> +    struct conn_dir *conndir;
>      bool found = false;
>  
> -    CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) {
> +    CMAP_FOR_EACH_WITH_HASH (conndir, cm_node, hash, &ct->conns) {
> +        conn = conn_from_conndir(conndir);
>          if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
>              found = true;
>              if (reply) {
> @@ -710,7 +703,7 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>  static void
>  pat_packet(struct dp_packet *pkt, const struct conn *conn)
>  {
> -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +    if (conn->conn_flags & NAT_ACTION_SRC) {
>          if (conn->key.nw_proto == IPPROTO_TCP) {
>              struct tcp_header *th = dp_packet_l4(pkt);
>              packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
> @@ -718,7 +711,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
>              struct udp_header *uh = dp_packet_l4(pkt);
>              packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
>          }
> -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +    } else if (conn->conn_flags & NAT_ACTION_DST) {
>          if (conn->key.nw_proto == IPPROTO_TCP) {
>              struct tcp_header *th = dp_packet_l4(pkt);
>              packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
> @@ -732,7 +725,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
>  static void
>  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
>  {
> -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +    if (conn->conn_flags & NAT_ACTION_SRC) {
>          pkt->md.ct_state |= CS_SRC_NAT;
>          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>              struct ip_header *nh = dp_packet_l3(pkt);
> @@ -747,7 +740,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
>          if (!related) {
>              pat_packet(pkt, conn);
>          }
> -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +    } else if (conn->conn_flags & NAT_ACTION_DST) {
>          pkt->md.ct_state |= CS_DST_NAT;
>          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>              struct ip_header *nh = dp_packet_l3(pkt);
> @@ -768,7 +761,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
>  static void
>  un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>  {
> -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +    if (conn->conn_flags & NAT_ACTION_SRC) {
>          if (conn->key.nw_proto == IPPROTO_TCP) {
>              struct tcp_header *th = dp_packet_l4(pkt);
>              packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> @@ -776,7 +769,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>              struct udp_header *uh = dp_packet_l4(pkt);
>              packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
>          }
> -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +    } else if (conn->conn_flags & NAT_ACTION_DST) {
>          if (conn->key.nw_proto == IPPROTO_TCP) {
>              struct tcp_header *th = dp_packet_l4(pkt);
>              packet_set_tcp_port(pkt, conn->key.dst.port, th->tcp_dst);
> @@ -790,7 +783,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>  static void
>  reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>  {
> -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +    if (conn->conn_flags & NAT_ACTION_SRC) {
>          if (conn->key.nw_proto == IPPROTO_TCP) {
>              struct tcp_header *th_in = dp_packet_l4(pkt);
>              packet_set_tcp_port(pkt, conn->key.src.port,
> @@ -800,7 +793,7 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>              packet_set_udp_port(pkt, conn->key.src.port,
>                                  uh_in->udp_dst);
>          }
> -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +    } else if (conn->conn_flags & NAT_ACTION_DST) {
>          if (conn->key.nw_proto == IPPROTO_TCP) {
>              struct tcp_header *th_in = dp_packet_l4(pkt);
>              packet_set_tcp_port(pkt, th_in->tcp_src,
> @@ -834,10 +827,10 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>          pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
>          pkt->l4_ofs += inner_l4 - (char *) icmp;
>  
> -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +        if (conn->conn_flags & NAT_ACTION_SRC) {
>              packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
>                                   conn->key.src.addr.ipv4);
> -        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +        } else if (conn->conn_flags & NAT_ACTION_DST) {
>              packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
>                                   conn->key.dst.addr.ipv4);
>          }
> @@ -858,11 +851,11 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>          pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
>          pkt->l4_ofs += inner_l4 - (char *) icmp6;
>  
> -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +        if (conn->conn_flags & NAT_ACTION_SRC) {
>              packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>                                   inner_l3_6->ip6_src.be32,
>                                   &conn->key.src.addr.ipv6, true);
> -        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +        } else if (conn->conn_flags & NAT_ACTION_DST) {
>              packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>                                   inner_l3_6->ip6_dst.be32,
>                                   &conn->key.dst.addr.ipv6, true);
> @@ -877,10 +870,9 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>  }
>  
>  static void
> -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> -              bool related)
> +un_nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
>  {
> -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> +    if (conn->conn_flags & NAT_ACTION_SRC) {
>          pkt->md.ct_state |= CS_DST_NAT;
>          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>              struct ip_header *nh = dp_packet_l3(pkt);
> @@ -898,7 +890,7 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
>          } else {
>              un_pat_packet(pkt, conn);
>          }
> -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> +    } else if (conn->conn_flags & NAT_ACTION_DST) {
>          pkt->md.ct_state |= CS_SRC_NAT;
>          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>              struct ip_header *nh = dp_packet_l3(pkt);
> @@ -964,7 +956,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>      OVS_REQUIRES(ct->ct_lock)
>  {
>      struct conn *nc = NULL;
> -    struct conn *nat_conn = NULL;
>  
>      if (!valid_new(pkt, &ctx->key)) {
>          pkt->md.ct_state = CS_INVALID;
> @@ -995,6 +986,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>          memcpy(&nc->key, &ctx->key, sizeof nc->key);
>          memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
>          conn_key_reverse(&nc->rev_key);
> +        nc->orig.orig = true;
> +        nc->rev.orig = !nc->orig.orig;
>  
>          if (ct_verify_helper(helper, ct_alg_ctl)) {
>              nc->alg = nullable_xstrdup(helper);
> @@ -1008,45 +1001,29 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>          }
>  
>          if (nat_action_info) {
> -            nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
> -            nat_conn = xzalloc(sizeof *nat_conn);
> -
>              if (alg_exp) {
>                  if (alg_exp->nat_rpl_dst) {
>                      nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
> -                    nc->nat_info->nat_action = NAT_ACTION_SRC;
> +                    nc->conn_flags |= NAT_ACTION_SRC;
>                  } else {
>                      nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
> -                    nc->nat_info->nat_action = NAT_ACTION_DST;
> +                    nc->conn_flags |= NAT_ACTION_DST;
>                  }
>              } else {
> -                memcpy(nat_conn, nc, sizeof *nat_conn);
> -                bool nat_res = nat_select_range_tuple(ct, nc, nat_conn);
> +                bool nat_res = nat_select_range_tuple(ct, nc, nat_action_info);
>  
>                  if (!nat_res) {
>                      goto nat_res_exhaustion;
>                  }
> -
> -                /* Update nc with nat adjustments made to nat_conn by
> -                 * nat_select_range_tuple(). */
> -                memcpy(nc, nat_conn, sizeof *nc);
>              }
>  
>              nat_packet(pkt, nc, ctx->icmp_related);
> -            memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
> -            memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
> -            nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> -            nat_conn->nat_info = NULL;
> -            nat_conn->alg = NULL;
> -            nat_conn->nat_conn = NULL;
> -            uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
> -            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
> +            uint32_t nat_hash = conn_key_hash(&nc->rev_key, ct->hash_basis);
> +            cmap_insert(&ct->conns, &nc->rev.cm_node, nat_hash);
>          }
>  
> -        nc->nat_conn = nat_conn;
>          ovs_mutex_init_adaptive(&nc->lock);
> -        nc->conn_type = CT_CONN_TYPE_DEFAULT;
> -        cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
> +        cmap_insert(&ct->conns, &nc->orig.cm_node, ctx->hash);
>          atomic_count_inc(&ct->n_conn);
>          ctx->conn = nc; /* For completeness. */
>          if (zl) {
> @@ -1066,7 +1043,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>       * firewall rules or a separate firewall.  Also using zone partitioning
>       * can limit DoS impact. */
>  nat_res_exhaustion:
> -    free(nat_conn);
>      ovs_list_remove(&nc->exp_node);
>      delete_conn_cmn(nc);
>      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
> @@ -1080,7 +1056,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
>                    struct conn_lookup_ctx *ctx, struct conn *conn,
>                    long long now)
>  {
> -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>      bool create_new_conn = false;
>  
>      if (ctx->icmp_related) {
> @@ -1128,7 +1103,7 @@ static void
>  handle_nat(struct dp_packet *pkt, struct conn *conn,
>             uint16_t zone, bool reply, bool related)
>  {
> -    if (conn->nat_info &&
> +    if (conn->conn_flags & CONN_FLAG_NAT_MASK &&
>          (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
>            (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
>             zone != pkt->md.ct_zone))) {
> @@ -1301,25 +1276,6 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
>          conn = NULL;
>      }
>  
> -    if (OVS_LIKELY(conn)) {
> -        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> -
> -            ctx->reply = true;
> -            struct conn *rev_conn = conn;  /* Save for debugging. */
> -            uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> -            conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
> -
> -            if (!conn) {
> -                pkt->md.ct_state |= CS_INVALID;
> -                write_ct_md(pkt, zone, NULL, NULL, NULL);
> -                char *log_msg = xasprintf("Missing parent conn %p", rev_conn);
> -                ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true);
> -                free(log_msg);
> -                return;
> -            }
> -        }
> -    }
> -
>      enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
>                                                         helper);
>  
> @@ -2114,12 +2070,13 @@ conn_key_reverse(struct conn_key *key)
>  }
>  
>  static uint32_t
> -nat_ipv6_addrs_delta(struct in6_addr *ipv6_min, struct in6_addr *ipv6_max)
> +nat_ipv6_addrs_delta(const struct in6_addr *ipv6_min,
> +                     const struct in6_addr *ipv6_max)
>  {
> -    uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> -    uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
> -    uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> -    uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
> +    const uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> +    const uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
> +    const uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> +    const uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);

Why these const changes now?  This should be a separate patch.

>      ovs_be64 addr6_64_min_hi;
>      ovs_be64 addr6_64_min_lo;
> @@ -2180,15 +2137,17 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment)
>  }
>  
>  static uint32_t
> -nat_range_hash(const struct conn *conn, uint32_t basis)
> +nat_range_hash(const struct conn *conn,
> +               const struct nat_action_info_t *nat_info,
> +               uint32_t basis)
>  {
>      uint32_t hash = basis;
>  
> -    hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
> -    hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
> +    hash = ct_addr_hash_add(hash, &nat_info->min_addr);
> +    hash = ct_addr_hash_add(hash, &nat_info->max_addr);
>      hash = hash_add(hash,
> -                    (conn->nat_info->max_port << 16)
> -                    | conn->nat_info->min_port);
> +                    (nat_info->max_port << 16)
> +                    | nat_info->min_port);
>      hash = ct_endpoint_hash_add(hash, &conn->key.src);
>      hash = ct_endpoint_hash_add(hash, &conn->key.dst);
>      hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type);
> @@ -2202,8 +2161,8 @@ nat_range_hash(const struct conn *conn, uint32_t basis)
>  }
>  
>  static bool
> -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> -                       struct conn *nat_conn)
> +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> +                       const struct nat_action_info_t *nat_info)
>  {
>      enum { MIN_NAT_EPHEMERAL_PORT = 1024,
>             MAX_NAT_EPHEMERAL_PORT = 65535 };
> @@ -2211,25 +2170,26 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
>      uint16_t min_port;
>      uint16_t max_port;
>      uint16_t first_port;
> -    uint32_t hash = nat_range_hash(conn, ct->hash_basis);
> +    uint32_t hash = nat_range_hash(conn, nat_info, ct->hash_basis);
>  
> -    if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
> -        (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> +    if ((nat_info->nat_action & NAT_ACTION_SRC) &&
> +        (!(nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
>          min_port = ntohs(conn->key.src.port);
>          max_port = ntohs(conn->key.src.port);
>          first_port = min_port;
> -    } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
> -               (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> +    } else if ((nat_info->nat_action & NAT_ACTION_DST) &&
> +               (!(nat_info->nat_action & NAT_ACTION_DST_PORT))) {
>          min_port = ntohs(conn->key.dst.port);
>          max_port = ntohs(conn->key.dst.port);
>          first_port = min_port;
>      } else {
> -        uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
> +        uint16_t deltap = nat_info->max_port - nat_info->min_port;
>          uint32_t port_index = hash % (deltap + 1);
> -        first_port = conn->nat_info->min_port + port_index;
> -        min_port = conn->nat_info->min_port;
> -        max_port = conn->nat_info->max_port;
> +        first_port = nat_info->min_port + port_index;
> +        min_port = nat_info->min_port;
> +        max_port = nat_info->max_port;
>      }
> +    conn->conn_flags |= nat_info->nat_action;
>  
>      uint32_t deltaa = 0;
>      uint32_t address_index;
> @@ -2237,25 +2197,25 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
>      memset(&ct_addr, 0, sizeof ct_addr);
>      union ct_addr max_ct_addr;
>      memset(&max_ct_addr, 0, sizeof max_ct_addr);
> -    max_ct_addr = conn->nat_info->max_addr;
> +    max_ct_addr = nat_info->max_addr;
>  
>      if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> -        deltaa = ntohl(conn->nat_info->max_addr.ipv4) -
> -                 ntohl(conn->nat_info->min_addr.ipv4);
> +        deltaa = ntohl(nat_info->max_addr.ipv4) -
> +                 ntohl(nat_info->min_addr.ipv4);
>          address_index = hash % (deltaa + 1);
>          ct_addr.ipv4 = htonl(
> -            ntohl(conn->nat_info->min_addr.ipv4) + address_index);
> +            ntohl(nat_info->min_addr.ipv4) + address_index);
>      } else {
> -        deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6,
> -                                      &conn->nat_info->max_addr.ipv6);
> +        deltaa = nat_ipv6_addrs_delta(&nat_info->min_addr.ipv6,
> +                                      &nat_info->max_addr.ipv6);
>          /* deltaa must be within 32 bits for full hash coverage. A 64 or
>           * 128 bit hash is unnecessary and hence not used here. Most code
>           * is kept common with V4; nat_ipv6_addrs_delta() will do the
>           * enforcement via max_ct_addr. */
> -        max_ct_addr = conn->nat_info->min_addr;
> +        max_ct_addr = nat_info->min_addr;
>          nat_ipv6_addr_increment(&max_ct_addr.ipv6, deltaa);
>          address_index = hash % (deltaa + 1);
> -        ct_addr.ipv6 = conn->nat_info->min_addr.ipv6;
> +        ct_addr.ipv6 = nat_info->min_addr.ipv6;
>          nat_ipv6_addr_increment(&ct_addr.ipv6, address_index);
>      }
>  
> @@ -2263,27 +2223,27 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
>      bool all_ports_tried = false;
>      /* For DNAT or for specified port ranges, we don't use ephemeral ports. */
>      bool ephemeral_ports_tried
> -        = conn->nat_info->nat_action & NAT_ACTION_DST ||
> -              conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
> +        = nat_info->nat_action & NAT_ACTION_DST ||
> +              nat_info->nat_action & NAT_ACTION_SRC_PORT
>            ? true : false;
>      union ct_addr first_addr = ct_addr;
>      bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
>                         conn->key.nw_proto != IPPROTO_ICMPV6;
>  
>      while (true) {
> -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> -            nat_conn->rev_key.dst.addr = ct_addr;
> +        if (nat_info->nat_action & NAT_ACTION_SRC) {
> +            conn->rev_key.dst.addr = ct_addr;
>              if (pat_enabled) {
> -                nat_conn->rev_key.dst.port = htons(port);
> +                conn->rev_key.dst.port = htons(port);
>              }
>          } else {
> -            nat_conn->rev_key.src.addr = ct_addr;
> +            conn->rev_key.src.addr = ct_addr;
>              if (pat_enabled) {
> -                nat_conn->rev_key.src.port = htons(port);
> +                conn->rev_key.src.port = htons(port);
>              }
>          }
>  
> -        bool found = conn_lookup(ct, &nat_conn->rev_key, time_msec(), NULL,
> +        bool found = conn_lookup(ct, &conn->rev_key, time_msec(), NULL,
>                                   NULL);
>          if (!found) {
>              return true;
> @@ -2306,12 +2266,12 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
>                      nat_ipv6_addr_increment(&ct_addr.ipv6, 1);
>                  }
>              } else {
> -                ct_addr = conn->nat_info->min_addr;
> +                ct_addr = nat_info->min_addr;
>              }
>              if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
>                  if (pat_enabled && !ephemeral_ports_tried) {
>                      ephemeral_ports_tried = true;
> -                    ct_addr = conn->nat_info->min_addr;
> +                    ct_addr = nat_info->min_addr;
>                      first_addr = ct_addr;
>                      min_port = MIN_NAT_EPHEMERAL_PORT;
>                      max_port = MAX_NAT_EPHEMERAL_PORT;
> @@ -2324,6 +2284,7 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
>              all_ports_tried = false;
>          }
>      }
> +    conn->conn_flags &= ~CONN_FLAG_NAT_MASK;
>      return false;
>  }
>  
> @@ -2342,13 +2303,10 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
>  static bool
>  conn_expired(struct conn *conn, long long now)
>  {
> -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -        ovs_mutex_lock(&conn->lock);
> -        bool expired = now >= conn->expiration ? true : false;
> -        ovs_mutex_unlock(&conn->lock);
> -        return expired;
> -    }
> -    return false;
> +    ovs_mutex_lock(&conn->lock);
> +    bool expired = now >= conn->expiration ? true : false;
> +    ovs_mutex_unlock(&conn->lock);
> +    return expired;
>  }
>  
>  static bool
> @@ -2367,7 +2325,6 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
>  static void
>  delete_conn_cmn(struct conn *conn)
>  {
> -    free(conn->nat_info);
>      free(conn->alg);
>      free(conn);
>  }
> @@ -2375,22 +2332,10 @@ delete_conn_cmn(struct conn *conn)
>  static void
>  delete_conn(struct conn *conn)
>  {
> -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>      ovs_mutex_destroy(&conn->lock);
> -    free(conn->nat_conn);
>      delete_conn_cmn(conn);
>  }
>  
> -/* Only used by conn_clean_one(). */
> -static void
> -delete_conn_one(struct conn *conn)
> -{
> -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -        ovs_mutex_destroy(&conn->lock);
> -    }
> -    delete_conn_cmn(conn);
> -}
> -
>  /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
>   *
>   * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
> @@ -2539,10 +2484,14 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
>          if (!cm_node) {
>              break;
>          }
> -        struct conn *conn;
> -        INIT_CONTAINER(conn, cm_node, cm_node);
> -        if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
> -            (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
> +        struct conn_dir *conndir;
> +        struct conn* conn;
> +        INIT_CONTAINER(conndir, cm_node, cm_node);
> +        if (!conndir->orig) {
> +            continue;
> +        }
> +        conn = conn_from_conndir(conndir);
> +        if ((!dump->filter_zone || conn->key.zone == dump->zone)) {
>              conn_to_ct_dpif_entry(conn, entry, now);
>              return 0;
>          }
> @@ -2561,11 +2510,16 @@ int
>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>  {
>      struct conn *conn;
> +    struct conn_dir *conndir;
>  
>      ovs_mutex_lock(&ct->ct_lock);
> -    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> +    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> +        if (!conndir->orig) {
> +            continue;
> +        }
> +        conn = conn_from_conndir(conndir);
>          if (!zone || *zone == conn->key.zone) {
> -            conn_clean_one(ct, conn);
> +            conn_clean(ct, conn);
>          }
>      }
>      ovs_mutex_unlock(&ct->ct_lock);
> @@ -2586,7 +2540,7 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
>      ovs_mutex_lock(&ct->ct_lock);
>      conn_lookup(ct, &key, time_msec(), &conn, NULL);
>  
> -    if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> +    if (conn) {
>          conn_clean(ct, conn);
>      } else {
>          VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
> @@ -2744,8 +2698,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
>          alg_exp_node->nat_rpl_dst = true;
>          if (skip_nat) {
>              alg_nat_repl_addr = dst_addr;
> -        } else if (parent_conn->nat_info &&
> -                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> +        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
>              alg_nat_repl_addr = parent_conn->rev_key.src.addr;
>              alg_exp_node->nat_rpl_dst = false;
>          } else {
> @@ -2757,8 +2710,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
>          alg_exp_node->nat_rpl_dst = false;
>          if (skip_nat) {
>              alg_nat_repl_addr = src_addr;
> -        } else if (parent_conn->nat_info &&
> -                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> +        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
>              alg_nat_repl_addr = parent_conn->key.dst.addr;
>              alg_exp_node->nat_rpl_dst = true;
>          } else {
Peng He Feb. 27, 2021, 9:49 a.m. UTC | #3
Hi,

Thanks for the detailed reviews.
These patches are like RFC to see if the work is interesting enough.

Aaron Conole <aconole@redhat.com> 于2021年2月25日周四 上午1:37写道:
>
> "hepeng.0320" <hepeng.0320@bytedance.com> writes:
>
> > From: hepeng <hepeng.0320@bytedance.com>
> >
> > Currently, when doing NAT, the userspace conntrack will use an extra
> > conn for the two directions in a flow. However, each conn has actually
> > the two keys for both orig and rev flow directions. This patch
> > introduces a conn_dir member in the conn and it consists of both rev and
> > orig cmap_node for hash lookup. This saves extra allocation for
> > nat_conn, and makes userspace code much cleaner.
>
> If I'm understanding this correctly, you still re-insert the conn into
> the conn list?


Yes. This is to insert the rev key into the cmap also. So for the nat traffic,
both orig and rev key are in the cmap.

>
> > We also introduces a conn_flags member to reduce the memory footprint of a
> > conn.
>
> We should split this out to a separate patch.
>
> > Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> > ---
> >  lib/conntrack-private.h |  20 +--
> >  lib/conntrack.c         | 264 ++++++++++++++++------------------------
> >  2 files changed, 121 insertions(+), 163 deletions(-)
> >
> > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > index 789af82ff..6bec43d3f 100644
> > --- a/lib/conntrack-private.h
> > +++ b/lib/conntrack-private.h
> > @@ -83,21 +83,23 @@ struct alg_exp_node {
> >      bool nat_rpl_dst;
> >  };
> >
> > -enum OVS_PACKED_ENUM ct_conn_type {
> > -    CT_CONN_TYPE_DEFAULT,
> > -    CT_CONN_TYPE_UN_NAT,
> > +#define CONN_FLAG_NAT_MASK 0xf
> > +
> > +struct conn_dir {
> > +    struct cmap_node cm_node;
> > +    bool orig;
>
> This naming is confusing.  We can have 'conn->orig->orig'.  Consider
> renaming one of these fields to distinguish what is going on.  I would
> prefer seeing 'fwd' used (since it's the forward direction).

Yes, agree.

>
> >  };
> >
> >  struct conn {
> >      /* Immutable data. */
> >      struct conn_key key;
> > +    struct conn_dir orig;
> >      struct conn_key rev_key;
> > +    struct conn_dir rev;
>
> I think this might be better if setup like:
> +enum ct_direction {
> +    CT_DIRECTION_FWD,
> +    CT_DIRECTION_REV,
> +    CT_DIRECTIONS
> +}
> +
> +struct conn_data {
> +    struct conn_key key;
> +    struct cmap_node cm_node;
>  };
>
>  struct conn {
> -    struct conn_key key;
> -    struct conn_key rev_key;
> +    struct conn_data cn_data[CT_DIRECTIONS];
> ...
>
> Then in the code, we can always get orig and rev information:
>
>   conn->cn_data[CT_DIRECTION_FWD]
>   conn->cn_data[CT_DIRECTION_REV]
>
> Did I miss something?

Since both origin and rev are in the cmap, when you lookup the hash table,
you get a pointer to the  'middle data structure', in the patch, it is
the conn_dir.

However, you are not sure what you get is the origin dir or the rev dir. The key
is to use a variable stored in the conn_dir, like 'fwd' or 'orig'.
Then you know the dir,
by knowing the dir, you know the offset between your pointer and the pointer to
the conn, just like the belowing code:

static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
    return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
                           CONTAINER_OF(conndir, struct conn, rev);
}

In your case:

static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
    return conndir->orig ? CONTAINER_OF(conndir, struct conn, cn_data[FWD]) : \
                           CONTAINER_OF(conndir, struct conn, cn_data[REV]);
}

In the following patches I've changed the code of conn_dir as a part
of conn_key....



>
> >      struct conn_key parent_key; /* Only used for orig_tuple support. */
> >      struct ovs_list exp_node;
> > -    struct cmap_node cm_node;
> > -    struct nat_action_info_t *nat_info;
> > +    uint64_t conn_flags;
>
> As mentioned, please separate this as a different patch.  This is new
> piece of functionality.
>
> >      char *alg;
> > -    struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
> >
> >      /* Mutable data. */
> >      struct ovs_mutex lock; /* Guards all mutable fields. */
> > @@ -117,11 +119,15 @@ struct conn {
> >
> >      /* Immutable data. */
> >      bool alg_related; /* True if alg data connection. */
> > -    enum ct_conn_type conn_type;
> >
> >      uint32_t tp_id; /* Timeout policy ID. */
> >  };
> >
> > +static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> > +    return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
> > +                           CONTAINER_OF(conndir, struct conn, rev);
> > +}
> > +
> >  enum ct_update_res {
> >      CT_UPDATE_INVALID,
> >      CT_UPDATE_VALID,
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 930ed0be6..73d3a2fb2 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -94,7 +94,6 @@ static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
> >                               uint32_t tp_id);
> >  static void delete_conn_cmn(struct conn *);
> >  static void delete_conn(struct conn *);
> > -static void delete_conn_one(struct conn *conn);
> >  static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
> >                                        struct dp_packet *pkt,
> >                                        struct conn_lookup_ctx *ctx,
> > @@ -108,8 +107,8 @@ static void set_label(struct dp_packet *, struct conn *,
> >  static void *clean_thread_main(void *f_);
> >
> >  static bool
> > -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> > -                       struct conn *nat_conn);
> > +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> > +                       const struct nat_action_info_t *nat_action);
> >
> >  static uint8_t
> >  reverse_icmp_type(uint8_t type);
> > @@ -233,6 +232,7 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
> >      return 1;
> >  }
> >
> > +#if 0
>
> Please remove functions which are no longer used.

OK.

>
> >  static void
> >  ct_print_conn_info(const struct conn *c, const char *log_msg,
> >                     enum vlog_level vll, bool force, bool rl_on)
> > @@ -287,6 +287,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
> >          }
> >      }
> >  }
> > +#endif
> >
> >  /* Initializes the connection tracker 'ct'.  The caller is responsible for
> >   * calling 'conntrack_destroy()', when the instance is not needed anymore */
> > @@ -437,7 +438,7 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
> >      }
> >
> >      uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
> > -    cmap_remove(&ct->conns, &conn->cm_node, hash);
> > +    cmap_remove(&ct->conns, &conn->orig.cm_node, hash);
> >
> >      struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
> >      if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) {
> > @@ -451,12 +452,10 @@ static void
> >  conn_clean(struct conntrack *ct, struct conn *conn)
> >      OVS_REQUIRES(ct->ct_lock)
> >  {
> > -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> > -
> >      conn_clean_cmn(ct, conn);
> > -    if (conn->nat_conn) {
> > -        uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
> > -        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
> > +    if (conn->conn_flags & CONN_FLAG_NAT_MASK) {
> > +        uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> > +        cmap_remove(&ct->conns, &conn->rev.cm_node, hash);
> >      }
> >      ovs_list_remove(&conn->exp_node);
> >      conn->cleaned = true;
> > @@ -464,19 +463,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
> >      atomic_count_dec(&ct->n_conn);
> >  }
> >
> > -static void
> > -conn_clean_one(struct conntrack *ct, struct conn *conn)
> > -    OVS_REQUIRES(ct->ct_lock)
> > -{
> > -    conn_clean_cmn(ct, conn);
> > -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -        ovs_list_remove(&conn->exp_node);
> > -        conn->cleaned = true;
> > -        atomic_count_dec(&ct->n_conn);
> > -    }
> > -    ovsrcu_postpone(delete_conn_one, conn);
> > -}
> > -
> >  /* Destroys the connection tracker 'ct' and frees all the allocated memory.
> >   * The caller of this function must already have shut down packet input
> >   * and PMD threads (which would have been quiesced).  */
> > @@ -488,9 +474,14 @@ conntrack_destroy(struct conntrack *ct)
> >      pthread_join(ct->clean_thread, NULL);
> >      latch_destroy(&ct->clean_thread_exit);
> >
> > +    struct conn_dir *conndir;
> >      ovs_mutex_lock(&ct->ct_lock);
> > -    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> > -        conn_clean_one(ct, conn);
> > +    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> > +        if (!conndir->orig) {
> > +            continue;
> > +        }
> > +        conn = conn_from_conndir(conndir);
> > +        conn_clean(ct, conn);
> >      }
> >      cmap_destroy(&ct->conns);
> >
> > @@ -530,9 +521,11 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
> >                  bool *reply)
> >  {
> >      struct conn *conn;
> > +    struct conn_dir *conndir;
> >      bool found = false;
> >
> > -    CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) {
> > +    CMAP_FOR_EACH_WITH_HASH (conndir, cm_node, hash, &ct->conns) {
> > +        conn = conn_from_conndir(conndir);
> >          if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
> >              found = true;
> >              if (reply) {
> > @@ -710,7 +703,7 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
> >  static void
> >  pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
> > @@ -718,7 +711,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >              struct udp_header *uh = dp_packet_l4(pkt);
> >              packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
> > @@ -732,7 +725,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  static void
> >  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          pkt->md.ct_state |= CS_SRC_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -747,7 +740,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >          if (!related) {
> >              pat_packet(pkt, conn);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          pkt->md.ct_state |= CS_DST_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -768,7 +761,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >  static void
> >  un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> > @@ -776,7 +769,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >              struct udp_header *uh = dp_packet_l4(pkt);
> >              packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, conn->key.dst.port, th->tcp_dst);
> > @@ -790,7 +783,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  static void
> >  reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th_in = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, conn->key.src.port,
> > @@ -800,7 +793,7 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >              packet_set_udp_port(pkt, conn->key.src.port,
> >                                  uh_in->udp_dst);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th_in = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, th_in->tcp_src,
> > @@ -834,10 +827,10 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >          pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> >          pkt->l4_ofs += inner_l4 - (char *) icmp;
> >
> > -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +        if (conn->conn_flags & NAT_ACTION_SRC) {
> >              packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> >                                   conn->key.src.addr.ipv4);
> > -        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (conn->conn_flags & NAT_ACTION_DST) {
> >              packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> >                                   conn->key.dst.addr.ipv4);
> >          }
> > @@ -858,11 +851,11 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >          pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> >          pkt->l4_ofs += inner_l4 - (char *) icmp6;
> >
> > -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +        if (conn->conn_flags & NAT_ACTION_SRC) {
> >              packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >                                   inner_l3_6->ip6_src.be32,
> >                                   &conn->key.src.addr.ipv6, true);
> > -        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (conn->conn_flags & NAT_ACTION_DST) {
> >              packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >                                   inner_l3_6->ip6_dst.be32,
> >                                   &conn->key.dst.addr.ipv6, true);
> > @@ -877,10 +870,9 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  }
> >
> >  static void
> > -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> > -              bool related)
> > +un_nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          pkt->md.ct_state |= CS_DST_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -898,7 +890,7 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> >          } else {
> >              un_pat_packet(pkt, conn);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          pkt->md.ct_state |= CS_SRC_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -964,7 +956,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >      OVS_REQUIRES(ct->ct_lock)
> >  {
> >      struct conn *nc = NULL;
> > -    struct conn *nat_conn = NULL;
> >
> >      if (!valid_new(pkt, &ctx->key)) {
> >          pkt->md.ct_state = CS_INVALID;
> > @@ -995,6 +986,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >          memcpy(&nc->key, &ctx->key, sizeof nc->key);
> >          memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
> >          conn_key_reverse(&nc->rev_key);
> > +        nc->orig.orig = true;
> > +        nc->rev.orig = !nc->orig.orig;
> >
> >          if (ct_verify_helper(helper, ct_alg_ctl)) {
> >              nc->alg = nullable_xstrdup(helper);
> > @@ -1008,45 +1001,29 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >          }
> >
> >          if (nat_action_info) {
> > -            nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
> > -            nat_conn = xzalloc(sizeof *nat_conn);
> > -
> >              if (alg_exp) {
> >                  if (alg_exp->nat_rpl_dst) {
> >                      nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
> > -                    nc->nat_info->nat_action = NAT_ACTION_SRC;
> > +                    nc->conn_flags |= NAT_ACTION_SRC;
> >                  } else {
> >                      nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
> > -                    nc->nat_info->nat_action = NAT_ACTION_DST;
> > +                    nc->conn_flags |= NAT_ACTION_DST;
> >                  }
> >              } else {
> > -                memcpy(nat_conn, nc, sizeof *nat_conn);
> > -                bool nat_res = nat_select_range_tuple(ct, nc, nat_conn);
> > +                bool nat_res = nat_select_range_tuple(ct, nc, nat_action_info);
> >
> >                  if (!nat_res) {
> >                      goto nat_res_exhaustion;
> >                  }
> > -
> > -                /* Update nc with nat adjustments made to nat_conn by
> > -                 * nat_select_range_tuple(). */
> > -                memcpy(nc, nat_conn, sizeof *nc);
> >              }
> >
> >              nat_packet(pkt, nc, ctx->icmp_related);
> > -            memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
> > -            memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
> > -            nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> > -            nat_conn->nat_info = NULL;
> > -            nat_conn->alg = NULL;
> > -            nat_conn->nat_conn = NULL;
> > -            uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
> > -            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
> > +            uint32_t nat_hash = conn_key_hash(&nc->rev_key, ct->hash_basis);
> > +            cmap_insert(&ct->conns, &nc->rev.cm_node, nat_hash);
> >          }
> >
> > -        nc->nat_conn = nat_conn;
> >          ovs_mutex_init_adaptive(&nc->lock);
> > -        nc->conn_type = CT_CONN_TYPE_DEFAULT;
> > -        cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
> > +        cmap_insert(&ct->conns, &nc->orig.cm_node, ctx->hash);
> >          atomic_count_inc(&ct->n_conn);
> >          ctx->conn = nc; /* For completeness. */
> >          if (zl) {
> > @@ -1066,7 +1043,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >       * firewall rules or a separate firewall.  Also using zone partitioning
> >       * can limit DoS impact. */
> >  nat_res_exhaustion:
> > -    free(nat_conn);
> >      ovs_list_remove(&nc->exp_node);
> >      delete_conn_cmn(nc);
> >      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
> > @@ -1080,7 +1056,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
> >                    struct conn_lookup_ctx *ctx, struct conn *conn,
> >                    long long now)
> >  {
> > -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> >      bool create_new_conn = false;
> >
> >      if (ctx->icmp_related) {
> > @@ -1128,7 +1103,7 @@ static void
> >  handle_nat(struct dp_packet *pkt, struct conn *conn,
> >             uint16_t zone, bool reply, bool related)
> >  {
> > -    if (conn->nat_info &&
> > +    if (conn->conn_flags & CONN_FLAG_NAT_MASK &&
> >          (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
> >            (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
> >             zone != pkt->md.ct_zone))) {
> > @@ -1301,25 +1276,6 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
> >          conn = NULL;
> >      }
> >
> > -    if (OVS_LIKELY(conn)) {
> > -        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> > -
> > -            ctx->reply = true;
> > -            struct conn *rev_conn = conn;  /* Save for debugging. */
> > -            uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> > -            conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
> > -
> > -            if (!conn) {
> > -                pkt->md.ct_state |= CS_INVALID;
> > -                write_ct_md(pkt, zone, NULL, NULL, NULL);
> > -                char *log_msg = xasprintf("Missing parent conn %p", rev_conn);
> > -                ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true);
> > -                free(log_msg);
> > -                return;
> > -            }
> > -        }
> > -    }
> > -
> >      enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
> >                                                         helper);
> >
> > @@ -2114,12 +2070,13 @@ conn_key_reverse(struct conn_key *key)
> >  }
> >
> >  static uint32_t
> > -nat_ipv6_addrs_delta(struct in6_addr *ipv6_min, struct in6_addr *ipv6_max)
> > +nat_ipv6_addrs_delta(const struct in6_addr *ipv6_min,
> > +                     const struct in6_addr *ipv6_max)
> >  {
> > -    uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> > -    uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
> > -    uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> > -    uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
> > +    const uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> > +    const uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
> > +    const uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> > +    const uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
>
> Why these const changes now?  This should be a separate patch.
>
> >      ovs_be64 addr6_64_min_hi;
> >      ovs_be64 addr6_64_min_lo;
> > @@ -2180,15 +2137,17 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment)
> >  }
> >
> >  static uint32_t
> > -nat_range_hash(const struct conn *conn, uint32_t basis)
> > +nat_range_hash(const struct conn *conn,
> > +               const struct nat_action_info_t *nat_info,
> > +               uint32_t basis)
> >  {
> >      uint32_t hash = basis;
> >
> > -    hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
> > -    hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
> > +    hash = ct_addr_hash_add(hash, &nat_info->min_addr);
> > +    hash = ct_addr_hash_add(hash, &nat_info->max_addr);
> >      hash = hash_add(hash,
> > -                    (conn->nat_info->max_port << 16)
> > -                    | conn->nat_info->min_port);
> > +                    (nat_info->max_port << 16)
> > +                    | nat_info->min_port);
> >      hash = ct_endpoint_hash_add(hash, &conn->key.src);
> >      hash = ct_endpoint_hash_add(hash, &conn->key.dst);
> >      hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type);
> > @@ -2202,8 +2161,8 @@ nat_range_hash(const struct conn *conn, uint32_t basis)
> >  }
> >
> >  static bool
> > -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> > -                       struct conn *nat_conn)
> > +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> > +                       const struct nat_action_info_t *nat_info)
> >  {
> >      enum { MIN_NAT_EPHEMERAL_PORT = 1024,
> >             MAX_NAT_EPHEMERAL_PORT = 65535 };
> > @@ -2211,25 +2170,26 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >      uint16_t min_port;
> >      uint16_t max_port;
> >      uint16_t first_port;
> > -    uint32_t hash = nat_range_hash(conn, ct->hash_basis);
> > +    uint32_t hash = nat_range_hash(conn, nat_info, ct->hash_basis);
> >
> > -    if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
> > -        (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> > +    if ((nat_info->nat_action & NAT_ACTION_SRC) &&
> > +        (!(nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> >          min_port = ntohs(conn->key.src.port);
> >          max_port = ntohs(conn->key.src.port);
> >          first_port = min_port;
> > -    } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
> > -               (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> > +    } else if ((nat_info->nat_action & NAT_ACTION_DST) &&
> > +               (!(nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> >          min_port = ntohs(conn->key.dst.port);
> >          max_port = ntohs(conn->key.dst.port);
> >          first_port = min_port;
> >      } else {
> > -        uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
> > +        uint16_t deltap = nat_info->max_port - nat_info->min_port;
> >          uint32_t port_index = hash % (deltap + 1);
> > -        first_port = conn->nat_info->min_port + port_index;
> > -        min_port = conn->nat_info->min_port;
> > -        max_port = conn->nat_info->max_port;
> > +        first_port = nat_info->min_port + port_index;
> > +        min_port = nat_info->min_port;
> > +        max_port = nat_info->max_port;
> >      }
> > +    conn->conn_flags |= nat_info->nat_action;
> >
> >      uint32_t deltaa = 0;
> >      uint32_t address_index;
> > @@ -2237,25 +2197,25 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >      memset(&ct_addr, 0, sizeof ct_addr);
> >      union ct_addr max_ct_addr;
> >      memset(&max_ct_addr, 0, sizeof max_ct_addr);
> > -    max_ct_addr = conn->nat_info->max_addr;
> > +    max_ct_addr = nat_info->max_addr;
> >
> >      if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -        deltaa = ntohl(conn->nat_info->max_addr.ipv4) -
> > -                 ntohl(conn->nat_info->min_addr.ipv4);
> > +        deltaa = ntohl(nat_info->max_addr.ipv4) -
> > +                 ntohl(nat_info->min_addr.ipv4);
> >          address_index = hash % (deltaa + 1);
> >          ct_addr.ipv4 = htonl(
> > -            ntohl(conn->nat_info->min_addr.ipv4) + address_index);
> > +            ntohl(nat_info->min_addr.ipv4) + address_index);
> >      } else {
> > -        deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6,
> > -                                      &conn->nat_info->max_addr.ipv6);
> > +        deltaa = nat_ipv6_addrs_delta(&nat_info->min_addr.ipv6,
> > +                                      &nat_info->max_addr.ipv6);
> >          /* deltaa must be within 32 bits for full hash coverage. A 64 or
> >           * 128 bit hash is unnecessary and hence not used here. Most code
> >           * is kept common with V4; nat_ipv6_addrs_delta() will do the
> >           * enforcement via max_ct_addr. */
> > -        max_ct_addr = conn->nat_info->min_addr;
> > +        max_ct_addr = nat_info->min_addr;
> >          nat_ipv6_addr_increment(&max_ct_addr.ipv6, deltaa);
> >          address_index = hash % (deltaa + 1);
> > -        ct_addr.ipv6 = conn->nat_info->min_addr.ipv6;
> > +        ct_addr.ipv6 = nat_info->min_addr.ipv6;
> >          nat_ipv6_addr_increment(&ct_addr.ipv6, address_index);
> >      }
> >
> > @@ -2263,27 +2223,27 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >      bool all_ports_tried = false;
> >      /* For DNAT or for specified port ranges, we don't use ephemeral ports. */
> >      bool ephemeral_ports_tried
> > -        = conn->nat_info->nat_action & NAT_ACTION_DST ||
> > -              conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
> > +        = nat_info->nat_action & NAT_ACTION_DST ||
> > +              nat_info->nat_action & NAT_ACTION_SRC_PORT
> >            ? true : false;
> >      union ct_addr first_addr = ct_addr;
> >      bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
> >                         conn->key.nw_proto != IPPROTO_ICMPV6;
> >
> >      while (true) {
> > -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > -            nat_conn->rev_key.dst.addr = ct_addr;
> > +        if (nat_info->nat_action & NAT_ACTION_SRC) {
> > +            conn->rev_key.dst.addr = ct_addr;
> >              if (pat_enabled) {
> > -                nat_conn->rev_key.dst.port = htons(port);
> > +                conn->rev_key.dst.port = htons(port);
> >              }
> >          } else {
> > -            nat_conn->rev_key.src.addr = ct_addr;
> > +            conn->rev_key.src.addr = ct_addr;
> >              if (pat_enabled) {
> > -                nat_conn->rev_key.src.port = htons(port);
> > +                conn->rev_key.src.port = htons(port);
> >              }
> >          }
> >
> > -        bool found = conn_lookup(ct, &nat_conn->rev_key, time_msec(), NULL,
> > +        bool found = conn_lookup(ct, &conn->rev_key, time_msec(), NULL,
> >                                   NULL);
> >          if (!found) {
> >              return true;
> > @@ -2306,12 +2266,12 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >                      nat_ipv6_addr_increment(&ct_addr.ipv6, 1);
> >                  }
> >              } else {
> > -                ct_addr = conn->nat_info->min_addr;
> > +                ct_addr = nat_info->min_addr;
> >              }
> >              if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
> >                  if (pat_enabled && !ephemeral_ports_tried) {
> >                      ephemeral_ports_tried = true;
> > -                    ct_addr = conn->nat_info->min_addr;
> > +                    ct_addr = nat_info->min_addr;
> >                      first_addr = ct_addr;
> >                      min_port = MIN_NAT_EPHEMERAL_PORT;
> >                      max_port = MAX_NAT_EPHEMERAL_PORT;
> > @@ -2324,6 +2284,7 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >              all_ports_tried = false;
> >          }
> >      }
> > +    conn->conn_flags &= ~CONN_FLAG_NAT_MASK;
> >      return false;
> >  }
> >
> > @@ -2342,13 +2303,10 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
> >  static bool
> >  conn_expired(struct conn *conn, long long now)
> >  {
> > -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -        ovs_mutex_lock(&conn->lock);
> > -        bool expired = now >= conn->expiration ? true : false;
> > -        ovs_mutex_unlock(&conn->lock);
> > -        return expired;
> > -    }
> > -    return false;
> > +    ovs_mutex_lock(&conn->lock);
> > +    bool expired = now >= conn->expiration ? true : false;
> > +    ovs_mutex_unlock(&conn->lock);
> > +    return expired;
> >  }
> >
> >  static bool
> > @@ -2367,7 +2325,6 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
> >  static void
> >  delete_conn_cmn(struct conn *conn)
> >  {
> > -    free(conn->nat_info);
> >      free(conn->alg);
> >      free(conn);
> >  }
> > @@ -2375,22 +2332,10 @@ delete_conn_cmn(struct conn *conn)
> >  static void
> >  delete_conn(struct conn *conn)
> >  {
> > -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> >      ovs_mutex_destroy(&conn->lock);
> > -    free(conn->nat_conn);
> >      delete_conn_cmn(conn);
> >  }
> >
> > -/* Only used by conn_clean_one(). */
> > -static void
> > -delete_conn_one(struct conn *conn)
> > -{
> > -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -        ovs_mutex_destroy(&conn->lock);
> > -    }
> > -    delete_conn_cmn(conn);
> > -}
> > -
> >  /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
> >   *
> >   * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
> > @@ -2539,10 +2484,14 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
> >          if (!cm_node) {
> >              break;
> >          }
> > -        struct conn *conn;
> > -        INIT_CONTAINER(conn, cm_node, cm_node);
> > -        if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
> > -            (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
> > +        struct conn_dir *conndir;
> > +        struct conn* conn;
> > +        INIT_CONTAINER(conndir, cm_node, cm_node);
> > +        if (!conndir->orig) {
> > +            continue;
> > +        }
> > +        conn = conn_from_conndir(conndir);
> > +        if ((!dump->filter_zone || conn->key.zone == dump->zone)) {
> >              conn_to_ct_dpif_entry(conn, entry, now);
> >              return 0;
> >          }
> > @@ -2561,11 +2510,16 @@ int
> >  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> >  {
> >      struct conn *conn;
> > +    struct conn_dir *conndir;
> >
> >      ovs_mutex_lock(&ct->ct_lock);
> > -    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> > +    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> > +        if (!conndir->orig) {
> > +            continue;
> > +        }
> > +        conn = conn_from_conndir(conndir);
> >          if (!zone || *zone == conn->key.zone) {
> > -            conn_clean_one(ct, conn);
> > +            conn_clean(ct, conn);
> >          }
> >      }
> >      ovs_mutex_unlock(&ct->ct_lock);
> > @@ -2586,7 +2540,7 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
> >      ovs_mutex_lock(&ct->ct_lock);
> >      conn_lookup(ct, &key, time_msec(), &conn, NULL);
> >
> > -    if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > +    if (conn) {
> >          conn_clean(ct, conn);
> >      } else {
> >          VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
> > @@ -2744,8 +2698,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
> >          alg_exp_node->nat_rpl_dst = true;
> >          if (skip_nat) {
> >              alg_nat_repl_addr = dst_addr;
> > -        } else if (parent_conn->nat_info &&
> > -                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
> >              alg_nat_repl_addr = parent_conn->rev_key.src.addr;
> >              alg_exp_node->nat_rpl_dst = false;
> >          } else {
> > @@ -2757,8 +2710,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
> >          alg_exp_node->nat_rpl_dst = false;
> >          if (skip_nat) {
> >              alg_nat_repl_addr = src_addr;
> > -        } else if (parent_conn->nat_info &&
> > -                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
> >              alg_nat_repl_addr = parent_conn->key.dst.addr;
> >              alg_exp_node->nat_rpl_dst = true;
> >          } else {
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Gaetan Rivet March 2, 2021, 5:45 p.m. UTC | #4
On Sat, Feb 27, 2021, at 09:49, 贺鹏 wrote:
> Hi,
> 
> Thanks for the detailed reviews.
> These patches are like RFC to see if the work is interesting enough.
> 
> Aaron Conole <aconole@redhat.com> 于2021年2月25日周四 上午1:37写道:
> >
> > "hepeng.0320" <hepeng.0320@bytedance.com> writes:
> >
> > > From: hepeng <hepeng.0320@bytedance.com>
> > >
> > > Currently, when doing NAT, the userspace conntrack will use an extra
> > > conn for the two directions in a flow. However, each conn has actually
> > > the two keys for both orig and rev flow directions. This patch
> > > introduces a conn_dir member in the conn and it consists of both rev and
> > > orig cmap_node for hash lookup. This saves extra allocation for
> > > nat_conn, and makes userspace code much cleaner.
> >
> > If I'm understanding this correctly, you still re-insert the conn into
> > the conn list?
> 
> 
> Yes. This is to insert the rev key into the cmap also. So for the nat traffic,
> both orig and rev key are in the cmap.
> 
> >
> > > We also introduces a conn_flags member to reduce the memory footprint of a
> > > conn.
> >
> > We should split this out to a separate patch.
> >
> > > Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> > > ---
> > >  lib/conntrack-private.h |  20 +--
> > >  lib/conntrack.c         | 264 ++++++++++++++++------------------------
> > >  2 files changed, 121 insertions(+), 163 deletions(-)
> > >
> > > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > > index 789af82ff..6bec43d3f 100644
> > > --- a/lib/conntrack-private.h
> > > +++ b/lib/conntrack-private.h
> > > @@ -83,21 +83,23 @@ struct alg_exp_node {
> > >      bool nat_rpl_dst;
> > >  };
> > >
> > > -enum OVS_PACKED_ENUM ct_conn_type {
> > > -    CT_CONN_TYPE_DEFAULT,
> > > -    CT_CONN_TYPE_UN_NAT,
> > > +#define CONN_FLAG_NAT_MASK 0xf
> > > +
> > > +struct conn_dir {
> > > +    struct cmap_node cm_node;
> > > +    bool orig;
> >
> > This naming is confusing.  We can have 'conn->orig->orig'.  Consider
> > renaming one of these fields to distinguish what is going on.  I would
> > prefer seeing 'fwd' used (since it's the forward direction).
> 
> Yes, agree.
> 
> >
> > >  };
> > >
> > >  struct conn {
> > >      /* Immutable data. */
> > >      struct conn_key key;
> > > +    struct conn_dir orig;
> > >      struct conn_key rev_key;
> > > +    struct conn_dir rev;
> >
> > I think this might be better if setup like:
> > +enum ct_direction {
> > +    CT_DIRECTION_FWD,
> > +    CT_DIRECTION_REV,
> > +    CT_DIRECTIONS
> > +}
> > +
> > +struct conn_data {
> > +    struct conn_key key;
> > +    struct cmap_node cm_node;
> >  };
> >
> >  struct conn {
> > -    struct conn_key key;
> > -    struct conn_key rev_key;
> > +    struct conn_data cn_data[CT_DIRECTIONS];
> > ...
> >
> > Then in the code, we can always get orig and rev information:
> >
> >   conn->cn_data[CT_DIRECTION_FWD]
> >   conn->cn_data[CT_DIRECTION_REV]
> >
> > Did I miss something?
> 
> Since both origin and rev are in the cmap, when you lookup the hash table,
> you get a pointer to the  'middle data structure', in the patch, it is
> the conn_dir.
> 
> However, you are not sure what you get is the origin dir or the rev dir. The key
> is to use a variable stored in the conn_dir, like 'fwd' or 'orig'.
> Then you know the dir,
> by knowing the dir, you know the offset between your pointer and the pointer to
> the conn, just like the belowing code:
> 
> static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
>     return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
>                            CONTAINER_OF(conndir, struct conn, rev);
> }
> 
> In your case:
> 
> static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
>     return conndir->orig ? CONTAINER_OF(conndir, struct conn, cn_data[FWD]) : \
>                            CONTAINER_OF(conndir, struct conn, cn_data[REV]);
> }
> 
> In the following patches I've changed the code of conn_dir as a part
> of conn_key....

I haven't seen it in the following patches, I see in the last commit still struct conn_dir on its own, and not as part of conn_key. Maybe I misunderstood what you meant?

Also, logically I'd consider the conn_key as part of the conn_dir, not the inverse. I would expect conn_key to contain the whole key and only the key of the conn to compute its hash, no other elements such as a dir mark or a cmap node.

would it make sense to have something like

struct conn_dir {
    enum ct_direction dir;
    struct conn_key key;
    struct cmap_node cm_node;
};

struct conn {
    [...]
    struct conn_dir dir[CT_DIRECTIONS];
};

So the same as what Aaron proposed, but with 'conn_data' renamed as 'conn_dir' and the enum ct_directions used to mark each conn_dir. Maybe such enum could be packed to restrict its size to one byte, making it equivalent to your boolean-based solution?
Peng He March 6, 2021, 2:45 a.m. UTC | #5
Gaëtan Rivet <grive@u256.net> 于2021年3月3日周三 上午1:46写道:
>
> On Sat, Feb 27, 2021, at 09:49, 贺鹏 wrote:
> > Hi,
> >
> > Thanks for the detailed reviews.
> > These patches are like RFC to see if the work is interesting enough.
> >
> > Aaron Conole <aconole@redhat.com> 于2021年2月25日周四 上午1:37写道:
> > >
> > > "hepeng.0320" <hepeng.0320@bytedance.com> writes:
> > >
> > > > From: hepeng <hepeng.0320@bytedance.com>
> > > >
> > > > Currently, when doing NAT, the userspace conntrack will use an extra
> > > > conn for the two directions in a flow. However, each conn has actually
> > > > the two keys for both orig and rev flow directions. This patch
> > > > introduces a conn_dir member in the conn and it consists of both rev and
> > > > orig cmap_node for hash lookup. This saves extra allocation for
> > > > nat_conn, and makes userspace code much cleaner.
> > >
> > > If I'm understanding this correctly, you still re-insert the conn into
> > > the conn list?
> >
> >
> > Yes. This is to insert the rev key into the cmap also. So for the nat traffic,
> > both orig and rev key are in the cmap.
> >
> > >
> > > > We also introduces a conn_flags member to reduce the memory footprint of a
> > > > conn.
> > >
> > > We should split this out to a separate patch.
> > >
> > > > Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> > > > ---
> > > >  lib/conntrack-private.h |  20 +--
> > > >  lib/conntrack.c         | 264 ++++++++++++++++------------------------
> > > >  2 files changed, 121 insertions(+), 163 deletions(-)
> > > >
> > > > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > > > index 789af82ff..6bec43d3f 100644
> > > > --- a/lib/conntrack-private.h
> > > > +++ b/lib/conntrack-private.h
> > > > @@ -83,21 +83,23 @@ struct alg_exp_node {
> > > >      bool nat_rpl_dst;
> > > >  };
> > > >
> > > > -enum OVS_PACKED_ENUM ct_conn_type {
> > > > -    CT_CONN_TYPE_DEFAULT,
> > > > -    CT_CONN_TYPE_UN_NAT,
> > > > +#define CONN_FLAG_NAT_MASK 0xf
> > > > +
> > > > +struct conn_dir {
> > > > +    struct cmap_node cm_node;
> > > > +    bool orig;
> > >
> > > This naming is confusing.  We can have 'conn->orig->orig'.  Consider
> > > renaming one of these fields to distinguish what is going on.  I would
> > > prefer seeing 'fwd' used (since it's the forward direction).
> >
> > Yes, agree.
> >
> > >
> > > >  };
> > > >
> > > >  struct conn {
> > > >      /* Immutable data. */
> > > >      struct conn_key key;
> > > > +    struct conn_dir orig;
> > > >      struct conn_key rev_key;
> > > > +    struct conn_dir rev;
> > >
> > > I think this might be better if setup like:
> > > +enum ct_direction {
> > > +    CT_DIRECTION_FWD,
> > > +    CT_DIRECTION_REV,
> > > +    CT_DIRECTIONS
> > > +}
> > > +
> > > +struct conn_data {
> > > +    struct conn_key key;
> > > +    struct cmap_node cm_node;
> > >  };
> > >
> > >  struct conn {
> > > -    struct conn_key key;
> > > -    struct conn_key rev_key;
> > > +    struct conn_data cn_data[CT_DIRECTIONS];
> > > ...
> > >
> > > Then in the code, we can always get orig and rev information:
> > >
> > >   conn->cn_data[CT_DIRECTION_FWD]
> > >   conn->cn_data[CT_DIRECTION_REV]
> > >
> > > Did I miss something?
> >
> > Since both origin and rev are in the cmap, when you lookup the hash table,
> > you get a pointer to the  'middle data structure', in the patch, it is
> > the conn_dir.
> >
> > However, you are not sure what you get is the origin dir or the rev dir. The key
> > is to use a variable stored in the conn_dir, like 'fwd' or 'orig'.
> > Then you know the dir,
> > by knowing the dir, you know the offset between your pointer and the pointer to
> > the conn, just like the belowing code:
> >
> > static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> >     return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
> >                            CONTAINER_OF(conndir, struct conn, rev);
> > }
> >
> > In your case:
> >
> > static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> >     return conndir->orig ? CONTAINER_OF(conndir, struct conn, cn_data[FWD]) : \
> >                            CONTAINER_OF(conndir, struct conn, cn_data[REV]);
> > }
> >
> > In the following patches I've changed the code of conn_dir as a part
> > of conn_key....
>
> I haven't seen it in the following patches, I see in the last commit still struct conn_dir on its own, and not as part of conn_key. Maybe I misunderstood what you meant?
>
> Also, logically I'd consider the conn_key as part of the conn_dir, not the inverse. I would expect conn_key to contain the whole key and only the key of the conn to compute its hash, no other elements such as a dir mark or a cmap node.
>
> would it make sense to have something like
>
> struct conn_dir {
>     enum ct_direction dir;
>     struct conn_key key;
>     struct cmap_node cm_node;
> };
>
> struct conn {
>     [...]
>     struct conn_dir dir[CT_DIRECTIONS];
> };
>
> So the same as what Aaron proposed, but with 'conn_data' renamed as 'conn_dir' and the enum ct_directions used to mark each conn_dir. Maybe such enum could be packed to restrict its size to one byte, making it equivalent to your boolean-based solution?

Yes, it's the same.

>
> --
> Gaetan Rivet
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 789af82ff..6bec43d3f 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -83,21 +83,23 @@  struct alg_exp_node {
     bool nat_rpl_dst;
 };
 
-enum OVS_PACKED_ENUM ct_conn_type {
-    CT_CONN_TYPE_DEFAULT,
-    CT_CONN_TYPE_UN_NAT,
+#define CONN_FLAG_NAT_MASK 0xf
+
+struct conn_dir {
+    struct cmap_node cm_node;
+    bool orig;
 };
 
 struct conn {
     /* Immutable data. */
     struct conn_key key;
+    struct conn_dir orig;
     struct conn_key rev_key;
+    struct conn_dir rev;
     struct conn_key parent_key; /* Only used for orig_tuple support. */
     struct ovs_list exp_node;
-    struct cmap_node cm_node;
-    struct nat_action_info_t *nat_info;
+    uint64_t conn_flags;
     char *alg;
-    struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
 
     /* Mutable data. */
     struct ovs_mutex lock; /* Guards all mutable fields. */
@@ -117,11 +119,15 @@  struct conn {
 
     /* Immutable data. */
     bool alg_related; /* True if alg data connection. */
-    enum ct_conn_type conn_type;
 
     uint32_t tp_id; /* Timeout policy ID. */
 };
 
+static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
+    return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
+                           CONTAINER_OF(conndir, struct conn, rev);
+}
+
 enum ct_update_res {
     CT_UPDATE_INVALID,
     CT_UPDATE_VALID,
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 930ed0be6..73d3a2fb2 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -94,7 +94,6 @@  static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
                              uint32_t tp_id);
 static void delete_conn_cmn(struct conn *);
 static void delete_conn(struct conn *);
-static void delete_conn_one(struct conn *conn);
 static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
                                       struct dp_packet *pkt,
                                       struct conn_lookup_ctx *ctx,
@@ -108,8 +107,8 @@  static void set_label(struct dp_packet *, struct conn *,
 static void *clean_thread_main(void *f_);
 
 static bool
-nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
-                       struct conn *nat_conn);
+nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
+                       const struct nat_action_info_t *nat_action);
 
 static uint8_t
 reverse_icmp_type(uint8_t type);
@@ -233,6 +232,7 @@  conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
     return 1;
 }
 
+#if 0
 static void
 ct_print_conn_info(const struct conn *c, const char *log_msg,
                    enum vlog_level vll, bool force, bool rl_on)
@@ -287,6 +287,7 @@  ct_print_conn_info(const struct conn *c, const char *log_msg,
         }
     }
 }
+#endif
 
 /* Initializes the connection tracker 'ct'.  The caller is responsible for
  * calling 'conntrack_destroy()', when the instance is not needed anymore */
@@ -437,7 +438,7 @@  conn_clean_cmn(struct conntrack *ct, struct conn *conn)
     }
 
     uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
-    cmap_remove(&ct->conns, &conn->cm_node, hash);
+    cmap_remove(&ct->conns, &conn->orig.cm_node, hash);
 
     struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
     if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) {
@@ -451,12 +452,10 @@  static void
 conn_clean(struct conntrack *ct, struct conn *conn)
     OVS_REQUIRES(ct->ct_lock)
 {
-    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
-
     conn_clean_cmn(ct, conn);
-    if (conn->nat_conn) {
-        uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
-        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
+    if (conn->conn_flags & CONN_FLAG_NAT_MASK) {
+        uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
+        cmap_remove(&ct->conns, &conn->rev.cm_node, hash);
     }
     ovs_list_remove(&conn->exp_node);
     conn->cleaned = true;
@@ -464,19 +463,6 @@  conn_clean(struct conntrack *ct, struct conn *conn)
     atomic_count_dec(&ct->n_conn);
 }
 
-static void
-conn_clean_one(struct conntrack *ct, struct conn *conn)
-    OVS_REQUIRES(ct->ct_lock)
-{
-    conn_clean_cmn(ct, conn);
-    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_list_remove(&conn->exp_node);
-        conn->cleaned = true;
-        atomic_count_dec(&ct->n_conn);
-    }
-    ovsrcu_postpone(delete_conn_one, conn);
-}
-
 /* Destroys the connection tracker 'ct' and frees all the allocated memory.
  * The caller of this function must already have shut down packet input
  * and PMD threads (which would have been quiesced).  */
@@ -488,9 +474,14 @@  conntrack_destroy(struct conntrack *ct)
     pthread_join(ct->clean_thread, NULL);
     latch_destroy(&ct->clean_thread_exit);
 
+    struct conn_dir *conndir;
     ovs_mutex_lock(&ct->ct_lock);
-    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
-        conn_clean_one(ct, conn);
+    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
+        if (!conndir->orig) {
+            continue;
+        }
+        conn = conn_from_conndir(conndir);
+        conn_clean(ct, conn);
     }
     cmap_destroy(&ct->conns);
 
@@ -530,9 +521,11 @@  conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
                 bool *reply)
 {
     struct conn *conn;
+    struct conn_dir *conndir;
     bool found = false;
 
-    CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) {
+    CMAP_FOR_EACH_WITH_HASH (conndir, cm_node, hash, &ct->conns) {
+        conn = conn_from_conndir(conndir);
         if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
             found = true;
             if (reply) {
@@ -710,7 +703,7 @@  handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
 static void
 pat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
-    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+    if (conn->conn_flags & NAT_ACTION_SRC) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
             struct tcp_header *th = dp_packet_l4(pkt);
             packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
@@ -718,7 +711,7 @@  pat_packet(struct dp_packet *pkt, const struct conn *conn)
             struct udp_header *uh = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
         }
-    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+    } else if (conn->conn_flags & NAT_ACTION_DST) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
             struct tcp_header *th = dp_packet_l4(pkt);
             packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
@@ -732,7 +725,7 @@  pat_packet(struct dp_packet *pkt, const struct conn *conn)
 static void
 nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
 {
-    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+    if (conn->conn_flags & NAT_ACTION_SRC) {
         pkt->md.ct_state |= CS_SRC_NAT;
         if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
             struct ip_header *nh = dp_packet_l3(pkt);
@@ -747,7 +740,7 @@  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
         if (!related) {
             pat_packet(pkt, conn);
         }
-    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+    } else if (conn->conn_flags & NAT_ACTION_DST) {
         pkt->md.ct_state |= CS_DST_NAT;
         if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
             struct ip_header *nh = dp_packet_l3(pkt);
@@ -768,7 +761,7 @@  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
 static void
 un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
-    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+    if (conn->conn_flags & NAT_ACTION_SRC) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
             struct tcp_header *th = dp_packet_l4(pkt);
             packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
@@ -776,7 +769,7 @@  un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
             struct udp_header *uh = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
         }
-    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+    } else if (conn->conn_flags & NAT_ACTION_DST) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
             struct tcp_header *th = dp_packet_l4(pkt);
             packet_set_tcp_port(pkt, conn->key.dst.port, th->tcp_dst);
@@ -790,7 +783,7 @@  un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
 static void
 reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
-    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+    if (conn->conn_flags & NAT_ACTION_SRC) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
             struct tcp_header *th_in = dp_packet_l4(pkt);
             packet_set_tcp_port(pkt, conn->key.src.port,
@@ -800,7 +793,7 @@  reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
             packet_set_udp_port(pkt, conn->key.src.port,
                                 uh_in->udp_dst);
         }
-    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+    } else if (conn->conn_flags & NAT_ACTION_DST) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
             struct tcp_header *th_in = dp_packet_l4(pkt);
             packet_set_tcp_port(pkt, th_in->tcp_src,
@@ -834,10 +827,10 @@  reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
         pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
         pkt->l4_ofs += inner_l4 - (char *) icmp;
 
-        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+        if (conn->conn_flags & NAT_ACTION_SRC) {
             packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
                                  conn->key.src.addr.ipv4);
-        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+        } else if (conn->conn_flags & NAT_ACTION_DST) {
             packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
                                  conn->key.dst.addr.ipv4);
         }
@@ -858,11 +851,11 @@  reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
         pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
         pkt->l4_ofs += inner_l4 - (char *) icmp6;
 
-        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+        if (conn->conn_flags & NAT_ACTION_SRC) {
             packet_set_ipv6_addr(pkt, conn->key.nw_proto,
                                  inner_l3_6->ip6_src.be32,
                                  &conn->key.src.addr.ipv6, true);
-        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+        } else if (conn->conn_flags & NAT_ACTION_DST) {
             packet_set_ipv6_addr(pkt, conn->key.nw_proto,
                                  inner_l3_6->ip6_dst.be32,
                                  &conn->key.dst.addr.ipv6, true);
@@ -877,10 +870,9 @@  reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
 }
 
 static void
-un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
-              bool related)
+un_nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
 {
-    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+    if (conn->conn_flags & NAT_ACTION_SRC) {
         pkt->md.ct_state |= CS_DST_NAT;
         if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
             struct ip_header *nh = dp_packet_l3(pkt);
@@ -898,7 +890,7 @@  un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
         } else {
             un_pat_packet(pkt, conn);
         }
-    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+    } else if (conn->conn_flags & NAT_ACTION_DST) {
         pkt->md.ct_state |= CS_SRC_NAT;
         if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
             struct ip_header *nh = dp_packet_l3(pkt);
@@ -964,7 +956,6 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
     OVS_REQUIRES(ct->ct_lock)
 {
     struct conn *nc = NULL;
-    struct conn *nat_conn = NULL;
 
     if (!valid_new(pkt, &ctx->key)) {
         pkt->md.ct_state = CS_INVALID;
@@ -995,6 +986,8 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
         memcpy(&nc->key, &ctx->key, sizeof nc->key);
         memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
         conn_key_reverse(&nc->rev_key);
+        nc->orig.orig = true;
+        nc->rev.orig = !nc->orig.orig;
 
         if (ct_verify_helper(helper, ct_alg_ctl)) {
             nc->alg = nullable_xstrdup(helper);
@@ -1008,45 +1001,29 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
         }
 
         if (nat_action_info) {
-            nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
-            nat_conn = xzalloc(sizeof *nat_conn);
-
             if (alg_exp) {
                 if (alg_exp->nat_rpl_dst) {
                     nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
-                    nc->nat_info->nat_action = NAT_ACTION_SRC;
+                    nc->conn_flags |= NAT_ACTION_SRC;
                 } else {
                     nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
-                    nc->nat_info->nat_action = NAT_ACTION_DST;
+                    nc->conn_flags |= NAT_ACTION_DST;
                 }
             } else {
-                memcpy(nat_conn, nc, sizeof *nat_conn);
-                bool nat_res = nat_select_range_tuple(ct, nc, nat_conn);
+                bool nat_res = nat_select_range_tuple(ct, nc, nat_action_info);
 
                 if (!nat_res) {
                     goto nat_res_exhaustion;
                 }
-
-                /* Update nc with nat adjustments made to nat_conn by
-                 * nat_select_range_tuple(). */
-                memcpy(nc, nat_conn, sizeof *nc);
             }
 
             nat_packet(pkt, nc, ctx->icmp_related);
-            memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
-            memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
-            nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
-            nat_conn->nat_info = NULL;
-            nat_conn->alg = NULL;
-            nat_conn->nat_conn = NULL;
-            uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
-            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
+            uint32_t nat_hash = conn_key_hash(&nc->rev_key, ct->hash_basis);
+            cmap_insert(&ct->conns, &nc->rev.cm_node, nat_hash);
         }
 
-        nc->nat_conn = nat_conn;
         ovs_mutex_init_adaptive(&nc->lock);
-        nc->conn_type = CT_CONN_TYPE_DEFAULT;
-        cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
+        cmap_insert(&ct->conns, &nc->orig.cm_node, ctx->hash);
         atomic_count_inc(&ct->n_conn);
         ctx->conn = nc; /* For completeness. */
         if (zl) {
@@ -1066,7 +1043,6 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
      * firewall rules or a separate firewall.  Also using zone partitioning
      * can limit DoS impact. */
 nat_res_exhaustion:
-    free(nat_conn);
     ovs_list_remove(&nc->exp_node);
     delete_conn_cmn(nc);
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
@@ -1080,7 +1056,6 @@  conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
                   struct conn_lookup_ctx *ctx, struct conn *conn,
                   long long now)
 {
-    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
     bool create_new_conn = false;
 
     if (ctx->icmp_related) {
@@ -1128,7 +1103,7 @@  static void
 handle_nat(struct dp_packet *pkt, struct conn *conn,
            uint16_t zone, bool reply, bool related)
 {
-    if (conn->nat_info &&
+    if (conn->conn_flags & CONN_FLAG_NAT_MASK &&
         (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
           (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
            zone != pkt->md.ct_zone))) {
@@ -1301,25 +1276,6 @@  process_one(struct conntrack *ct, struct dp_packet *pkt,
         conn = NULL;
     }
 
-    if (OVS_LIKELY(conn)) {
-        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
-
-            ctx->reply = true;
-            struct conn *rev_conn = conn;  /* Save for debugging. */
-            uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
-            conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
-
-            if (!conn) {
-                pkt->md.ct_state |= CS_INVALID;
-                write_ct_md(pkt, zone, NULL, NULL, NULL);
-                char *log_msg = xasprintf("Missing parent conn %p", rev_conn);
-                ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true);
-                free(log_msg);
-                return;
-            }
-        }
-    }
-
     enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
                                                        helper);
 
@@ -2114,12 +2070,13 @@  conn_key_reverse(struct conn_key *key)
 }
 
 static uint32_t
-nat_ipv6_addrs_delta(struct in6_addr *ipv6_min, struct in6_addr *ipv6_max)
+nat_ipv6_addrs_delta(const struct in6_addr *ipv6_min,
+                     const struct in6_addr *ipv6_max)
 {
-    uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
-    uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
-    uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
-    uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
+    const uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
+    const uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
+    const uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
+    const uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
 
     ovs_be64 addr6_64_min_hi;
     ovs_be64 addr6_64_min_lo;
@@ -2180,15 +2137,17 @@  nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment)
 }
 
 static uint32_t
-nat_range_hash(const struct conn *conn, uint32_t basis)
+nat_range_hash(const struct conn *conn,
+               const struct nat_action_info_t *nat_info,
+               uint32_t basis)
 {
     uint32_t hash = basis;
 
-    hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
-    hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
+    hash = ct_addr_hash_add(hash, &nat_info->min_addr);
+    hash = ct_addr_hash_add(hash, &nat_info->max_addr);
     hash = hash_add(hash,
-                    (conn->nat_info->max_port << 16)
-                    | conn->nat_info->min_port);
+                    (nat_info->max_port << 16)
+                    | nat_info->min_port);
     hash = ct_endpoint_hash_add(hash, &conn->key.src);
     hash = ct_endpoint_hash_add(hash, &conn->key.dst);
     hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type);
@@ -2202,8 +2161,8 @@  nat_range_hash(const struct conn *conn, uint32_t basis)
 }
 
 static bool
-nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
-                       struct conn *nat_conn)
+nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
+                       const struct nat_action_info_t *nat_info)
 {
     enum { MIN_NAT_EPHEMERAL_PORT = 1024,
            MAX_NAT_EPHEMERAL_PORT = 65535 };
@@ -2211,25 +2170,26 @@  nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
     uint16_t min_port;
     uint16_t max_port;
     uint16_t first_port;
-    uint32_t hash = nat_range_hash(conn, ct->hash_basis);
+    uint32_t hash = nat_range_hash(conn, nat_info, ct->hash_basis);
 
-    if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
-        (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
+    if ((nat_info->nat_action & NAT_ACTION_SRC) &&
+        (!(nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
         min_port = ntohs(conn->key.src.port);
         max_port = ntohs(conn->key.src.port);
         first_port = min_port;
-    } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
-               (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
+    } else if ((nat_info->nat_action & NAT_ACTION_DST) &&
+               (!(nat_info->nat_action & NAT_ACTION_DST_PORT))) {
         min_port = ntohs(conn->key.dst.port);
         max_port = ntohs(conn->key.dst.port);
         first_port = min_port;
     } else {
-        uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
+        uint16_t deltap = nat_info->max_port - nat_info->min_port;
         uint32_t port_index = hash % (deltap + 1);
-        first_port = conn->nat_info->min_port + port_index;
-        min_port = conn->nat_info->min_port;
-        max_port = conn->nat_info->max_port;
+        first_port = nat_info->min_port + port_index;
+        min_port = nat_info->min_port;
+        max_port = nat_info->max_port;
     }
+    conn->conn_flags |= nat_info->nat_action;
 
     uint32_t deltaa = 0;
     uint32_t address_index;
@@ -2237,25 +2197,25 @@  nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
     memset(&ct_addr, 0, sizeof ct_addr);
     union ct_addr max_ct_addr;
     memset(&max_ct_addr, 0, sizeof max_ct_addr);
-    max_ct_addr = conn->nat_info->max_addr;
+    max_ct_addr = nat_info->max_addr;
 
     if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-        deltaa = ntohl(conn->nat_info->max_addr.ipv4) -
-                 ntohl(conn->nat_info->min_addr.ipv4);
+        deltaa = ntohl(nat_info->max_addr.ipv4) -
+                 ntohl(nat_info->min_addr.ipv4);
         address_index = hash % (deltaa + 1);
         ct_addr.ipv4 = htonl(
-            ntohl(conn->nat_info->min_addr.ipv4) + address_index);
+            ntohl(nat_info->min_addr.ipv4) + address_index);
     } else {
-        deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6,
-                                      &conn->nat_info->max_addr.ipv6);
+        deltaa = nat_ipv6_addrs_delta(&nat_info->min_addr.ipv6,
+                                      &nat_info->max_addr.ipv6);
         /* deltaa must be within 32 bits for full hash coverage. A 64 or
          * 128 bit hash is unnecessary and hence not used here. Most code
          * is kept common with V4; nat_ipv6_addrs_delta() will do the
          * enforcement via max_ct_addr. */
-        max_ct_addr = conn->nat_info->min_addr;
+        max_ct_addr = nat_info->min_addr;
         nat_ipv6_addr_increment(&max_ct_addr.ipv6, deltaa);
         address_index = hash % (deltaa + 1);
-        ct_addr.ipv6 = conn->nat_info->min_addr.ipv6;
+        ct_addr.ipv6 = nat_info->min_addr.ipv6;
         nat_ipv6_addr_increment(&ct_addr.ipv6, address_index);
     }
 
@@ -2263,27 +2223,27 @@  nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
     bool all_ports_tried = false;
     /* For DNAT or for specified port ranges, we don't use ephemeral ports. */
     bool ephemeral_ports_tried
-        = conn->nat_info->nat_action & NAT_ACTION_DST ||
-              conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
+        = nat_info->nat_action & NAT_ACTION_DST ||
+              nat_info->nat_action & NAT_ACTION_SRC_PORT
           ? true : false;
     union ct_addr first_addr = ct_addr;
     bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
                        conn->key.nw_proto != IPPROTO_ICMPV6;
 
     while (true) {
-        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
-            nat_conn->rev_key.dst.addr = ct_addr;
+        if (nat_info->nat_action & NAT_ACTION_SRC) {
+            conn->rev_key.dst.addr = ct_addr;
             if (pat_enabled) {
-                nat_conn->rev_key.dst.port = htons(port);
+                conn->rev_key.dst.port = htons(port);
             }
         } else {
-            nat_conn->rev_key.src.addr = ct_addr;
+            conn->rev_key.src.addr = ct_addr;
             if (pat_enabled) {
-                nat_conn->rev_key.src.port = htons(port);
+                conn->rev_key.src.port = htons(port);
             }
         }
 
-        bool found = conn_lookup(ct, &nat_conn->rev_key, time_msec(), NULL,
+        bool found = conn_lookup(ct, &conn->rev_key, time_msec(), NULL,
                                  NULL);
         if (!found) {
             return true;
@@ -2306,12 +2266,12 @@  nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
                     nat_ipv6_addr_increment(&ct_addr.ipv6, 1);
                 }
             } else {
-                ct_addr = conn->nat_info->min_addr;
+                ct_addr = nat_info->min_addr;
             }
             if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
                 if (pat_enabled && !ephemeral_ports_tried) {
                     ephemeral_ports_tried = true;
-                    ct_addr = conn->nat_info->min_addr;
+                    ct_addr = nat_info->min_addr;
                     first_addr = ct_addr;
                     min_port = MIN_NAT_EPHEMERAL_PORT;
                     max_port = MAX_NAT_EPHEMERAL_PORT;
@@ -2324,6 +2284,7 @@  nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
             all_ports_tried = false;
         }
     }
+    conn->conn_flags &= ~CONN_FLAG_NAT_MASK;
     return false;
 }
 
@@ -2342,13 +2303,10 @@  conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
 static bool
 conn_expired(struct conn *conn, long long now)
 {
-    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_mutex_lock(&conn->lock);
-        bool expired = now >= conn->expiration ? true : false;
-        ovs_mutex_unlock(&conn->lock);
-        return expired;
-    }
-    return false;
+    ovs_mutex_lock(&conn->lock);
+    bool expired = now >= conn->expiration ? true : false;
+    ovs_mutex_unlock(&conn->lock);
+    return expired;
 }
 
 static bool
@@ -2367,7 +2325,6 @@  new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
 static void
 delete_conn_cmn(struct conn *conn)
 {
-    free(conn->nat_info);
     free(conn->alg);
     free(conn);
 }
@@ -2375,22 +2332,10 @@  delete_conn_cmn(struct conn *conn)
 static void
 delete_conn(struct conn *conn)
 {
-    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
     ovs_mutex_destroy(&conn->lock);
-    free(conn->nat_conn);
     delete_conn_cmn(conn);
 }
 
-/* Only used by conn_clean_one(). */
-static void
-delete_conn_one(struct conn *conn)
-{
-    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_mutex_destroy(&conn->lock);
-    }
-    delete_conn_cmn(conn);
-}
-
 /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
  *
  * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
@@ -2539,10 +2484,14 @@  conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
         if (!cm_node) {
             break;
         }
-        struct conn *conn;
-        INIT_CONTAINER(conn, cm_node, cm_node);
-        if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
-            (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
+        struct conn_dir *conndir;
+        struct conn* conn;
+        INIT_CONTAINER(conndir, cm_node, cm_node);
+        if (!conndir->orig) {
+            continue;
+        }
+        conn = conn_from_conndir(conndir);
+        if ((!dump->filter_zone || conn->key.zone == dump->zone)) {
             conn_to_ct_dpif_entry(conn, entry, now);
             return 0;
         }
@@ -2561,11 +2510,16 @@  int
 conntrack_flush(struct conntrack *ct, const uint16_t *zone)
 {
     struct conn *conn;
+    struct conn_dir *conndir;
 
     ovs_mutex_lock(&ct->ct_lock);
-    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
+    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
+        if (!conndir->orig) {
+            continue;
+        }
+        conn = conn_from_conndir(conndir);
         if (!zone || *zone == conn->key.zone) {
-            conn_clean_one(ct, conn);
+            conn_clean(ct, conn);
         }
     }
     ovs_mutex_unlock(&ct->ct_lock);
@@ -2586,7 +2540,7 @@  conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
     ovs_mutex_lock(&ct->ct_lock);
     conn_lookup(ct, &key, time_msec(), &conn, NULL);
 
-    if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+    if (conn) {
         conn_clean(ct, conn);
     } else {
         VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
@@ -2744,8 +2698,7 @@  expectation_create(struct conntrack *ct, ovs_be16 dst_port,
         alg_exp_node->nat_rpl_dst = true;
         if (skip_nat) {
             alg_nat_repl_addr = dst_addr;
-        } else if (parent_conn->nat_info &&
-                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
+        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
             alg_nat_repl_addr = parent_conn->rev_key.src.addr;
             alg_exp_node->nat_rpl_dst = false;
         } else {
@@ -2757,8 +2710,7 @@  expectation_create(struct conntrack *ct, ovs_be16 dst_port,
         alg_exp_node->nat_rpl_dst = false;
         if (skip_nat) {
             alg_nat_repl_addr = src_addr;
-        } else if (parent_conn->nat_info &&
-                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
+        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
             alg_nat_repl_addr = parent_conn->key.dst.addr;
             alg_exp_node->nat_rpl_dst = true;
         } else {