[ovs-dev,v2,5/9] ofproto-dpif: Consume CT_Zone, and CT_Timeout_Policy tables
diff mbox series

Message ID 1564697253-37992-6-git-send-email-yihung.wei@gmail.com
State New
Headers show
Series
  • Support zone-based conntrack timeout policy
Related show

Commit Message

Yi-Hung Wei Aug. 1, 2019, 10:07 p.m. UTC
This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains
the zone-based timeout policy in the vswitchd. Whenever there is a
database change, vswitchd will read the datapath, CT_Zone, and
CT_Timeout_Policy tables from ovsdb to detect if the is any timeout
policy changes.

If a new timeout policy is added, it stores the information in
per datapath type internal datapath structure in struct dpif-backer,
and pushes down the conntrack timeout policy into the datapath via dpif
interface.

If a timeout policy is no longer used, vswitchd may not be able to
remove it from datapath immediately since the datapath flow may still
reference that. Instead, we keep an timeout policy kill list, that
vswitchd will goes back to the list regularly and try to kill the
unused timeout policies.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
 ofproto/ofproto-dpif.c     | 266 +++++++++++++++++++++++++++++++++++++++++++++
 ofproto/ofproto-dpif.h     |  10 ++
 ofproto/ofproto-provider.h |   8 ++
 ofproto/ofproto.c          |  20 ++++
 ofproto/ofproto.h          |   4 +
 vswitchd/bridge.c          |  41 +++++++
 6 files changed, 349 insertions(+)

Comments

Darrell Ball Aug. 6, 2019, 3:07 a.m. UTC | #1
Thanks for the patch

comments inline

On Thu, Aug 1, 2019 at 3:10 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote:

> This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains
> the zone-based timeout policy in the vswitchd. Whenever there is a
> database change, vswitchd will read the datapath, CT_Zone, and
> CT_Timeout_Policy tables from ovsdb to detect if the is any timeout
> policy changes.
>
> If a new timeout policy is added, it stores the information in
> per datapath type internal datapath structure in struct dpif-backer,
> and pushes down the conntrack timeout policy into the datapath via dpif
> interface.
>
> If a timeout policy is no longer used, vswitchd may not be able to
> remove it from datapath immediately since the datapath flow may still
> reference that. Instead, we keep an timeout policy kill list, that
> vswitchd will goes back to the list regularly and try to kill the
> unused timeout policies.
>
> Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
> ---
>  ofproto/ofproto-dpif.c     | 266
> +++++++++++++++++++++++++++++++++++++++++++++
>  ofproto/ofproto-dpif.h     |  10 ++
>  ofproto/ofproto-provider.h |   8 ++
>  ofproto/ofproto.c          |  20 ++++
>  ofproto/ofproto.h          |   4 +
>  vswitchd/bridge.c          |  41 +++++++
>  6 files changed, 349 insertions(+)
>
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 751535249e21..6336494e0bc8 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -156,6 +156,25 @@ struct ofport_dpif {
>      size_t n_qdscp;
>  };
>
> +struct ct_timeout_policy {
> +    struct uuid uuid;
> +    unsigned int last_used_seqno;
> +    struct ct_dpif_timeout_policy cdtp;
> +    struct cmap_node node;          /* Element in struct dpif_backer's
> +                                     * "ct_tps" cmap. */
>


This looks like a layering violation
should be in dpif-netlink or netlink-conntrack for kernel side


> +    struct ovs_list list_node;      /* Element in struct dpif_backer's
> +                                     * "ct_tp_kill_list" list. */





> +};
> +
> +struct ct_zone {
> +    uint16_t id;
> +    unsigned int last_used_seqno;
> +    struct uuid tp_uuid;            /* uuid that identifies a timeout
> policy in
> +                                     * struct dpif_backer's "ct_tps"
> cmap. */
> +    struct cmap_node node;          /* Element in struct dpif_backer's
> +                                     * "ct_zones" cmap. */
> +};
> +
>  static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
>                                         ofp_port_t);
>
> @@ -196,6 +215,8 @@ static struct hmap all_ofproto_dpifs_by_uuid =
>
>  static bool ofproto_use_tnl_push_pop = true;
>  static void ofproto_unixctl_init(void);
> +static void destroy_ct_zone_timeout_policy(struct dpif_backer *backer);
> +static void init_ct_zone_timeout_policy(struct dpif_backer *backer);
>
>  static inline struct ofproto_dpif *
>  ofproto_dpif_cast(const struct ofproto *ofproto)
> @@ -683,6 +704,7 @@ close_dpif_backer(struct dpif_backer *backer, bool del)
>      }
>      dpif_close(backer->dpif);
>      id_pool_destroy(backer->meter_ids);
> +    destroy_ct_zone_timeout_policy(backer);
>      free(backer);
>  }
>
> @@ -694,6 +716,8 @@ struct odp_garbage {
>
>  static void check_support(struct dpif_backer *backer);
>
> +#define MAX_TIMEOUT_POLICY_ID UINT32_MAX
> +
>  static int
>  open_dpif_backer(const char *type, struct dpif_backer **backerp)
>  {
> @@ -811,6 +835,8 @@ open_dpif_backer(const char *type, struct dpif_backer
> **backerp)
>          backer->meter_ids = NULL;
>      }
>
> +    init_ct_zone_timeout_policy(backer);
> +
>      /* Make a pristine snapshot of 'support' into 'boottime_support'.
>       * 'boottime_support' can be checked to prevent 'support' to be
> changed
>       * beyond the datapath capabilities. In case 'support' is changed by
> @@ -5086,6 +5112,244 @@ ct_flush(const struct ofproto *ofproto_, const
> uint16_t *zone)
>      ct_dpif_flush(ofproto->backer->dpif, zone, NULL);
>  }
>
> +static struct ct_zone *
> +ct_zone_lookup(struct cmap *ct_zones, uint16_t zone_id)
> +{
> +    struct ct_zone *zone;
> +
> +    CMAP_FOR_EACH_WITH_HASH (zone, node, hash_int(zone_id, 0), ct_zones) {
> +        if (zone->id == zone_id) {
> +            return zone;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static struct ct_zone *
> +ct_zone_alloc(uint16_t zone_id)
> +{
> +    struct ct_zone *zone;
> +
> +    zone = xzalloc(sizeof *zone);
> +    zone->id = zone_id;
> +
> +    return zone;
> +}
> +
> +static void
> +ct_zone_remove_and_destroy(struct dpif_backer *backer, struct ct_zone
> *zone)
> +{
> +    cmap_remove(&backer->ct_zones, &zone->node, hash_int(zone->id, 0));
> +    ovsrcu_postpone(free, zone);
> +}
> +
> +static struct ct_timeout_policy *
> +ct_timeout_policy_lookup(struct cmap *ct_tps, struct uuid *uuid)
> +{
> +    struct ct_timeout_policy *tp;
> +
> +    CMAP_FOR_EACH_WITH_HASH (tp, node, uuid_hash(uuid), ct_tps) {
> +        if (uuid_equals(&tp->uuid, uuid)) {
> +            return tp;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static struct ct_timeout_policy *
> +ct_timeout_policy_alloc(struct ovsrec_ct_timeout_policy *tp_cfg,
> +                        struct id_pool *tp_ids)
> +{
> +    struct ct_timeout_policy *tp;
> +    size_t i;
> +
> +    tp = xzalloc(sizeof *tp);
> +    tp->uuid = tp_cfg->header_.uuid;
> +    for (i = 0; i < tp_cfg->n_timeouts; i++) {
> +        ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp,
> +            tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
> +    }
> +    if (!id_pool_alloc_id(tp_ids, &tp->cdtp.id)) {
> +        VLOG_WARN_RL(&rl, "failed to allocate timeout policy id.");
> +        free(tp);
> +        return NULL;
> +    }
> +
> +    return tp;
> +}
> +
> +static void
> +ct_timeout_policy_destroy(struct dpif_backer *backer,
> +                          struct ct_timeout_policy *tp)
> +{
> +    id_pool_free_id(backer->timeout_policy_ids, tp->cdtp.id);
> +    ovsrcu_postpone(free, tp);
> +}
> +
> +static bool
> +ct_timeout_policy_update(struct ovsrec_ct_timeout_policy *tp_cfg,
> +                         struct ct_timeout_policy *tp)
> +{
> +    size_t i;
> +    bool changed = false;
> +
> +    for (i = 0; i < tp_cfg->n_timeouts; i++) {
> +        changed |= ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp,
> +                        tp_cfg->key_timeouts[i],
> tp_cfg->value_timeouts[i]);
> +    }
> +    return changed;
> +}
> +
> +static void
> +init_ct_zone_timeout_policy(struct dpif_backer *backer)
> +{
> +    cmap_init(&backer->ct_zones);
> +    cmap_init(&backer->ct_tps);
> +    backer->timeout_policy_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
> +    ovs_list_init(&backer->ct_tp_kill_list);
> +
> +    /* Remove existing timeout policy from datapath. */
> +    struct ct_dpif_timeout_policy cdtp;
> +    struct ct_timeout_policy *tp;
> +    void *state;
> +    int err;
>

don't need 'err'



> +
> +    err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state);
> +    if (err) {
> +        return;
> +    }
> +
> +    while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state,
> +                                                    &cdtp))) {
> +        tp = xzalloc(sizeof *tp);
> +        tp->cdtp = cdtp;
> +        id_pool_add(backer->timeout_policy_ids, cdtp.id);
> +        ovs_list_insert(&backer->ct_tp_kill_list, &tp->list_node);
> +    }
> +
> +    ct_dpif_timeout_policy_dump_done(backer->dpif, state);
> +}
> +
> +static void
> +destroy_ct_zone_timeout_policy(struct dpif_backer *backer)
> +{
> +    struct ct_timeout_policy *tp;
> +    struct ct_zone *zone;
> +
> +    CMAP_FOR_EACH (zone, node, &backer->ct_zones) {
> +        ct_zone_remove_and_destroy(backer, zone);
> +    }
> +
> +    CMAP_FOR_EACH (tp, node, &backer->ct_tps) {
> +        cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid));
> +        ct_timeout_policy_destroy(backer, tp);
> +    }
> +
> +    cmap_destroy(&backer->ct_zones);
> +    cmap_destroy(&backer->ct_tps);
> +    id_pool_destroy(backer->timeout_policy_ids);
> +}
> +
> +static void
> +ct_zone_timeout_policy_revalidate(struct dpif_backer *backer,
> +                                  unsigned int idl_seqno)
>

Looks like layering violation
'implementation' should be in dpif-netlink or netlink-conntrack for kernel
side



> +{
> +    struct ct_timeout_policy *tp;
> +    struct ct_zone *zone;
> +
> +    CMAP_FOR_EACH (zone, node, &backer->ct_zones) {
> +        if (zone->last_used_seqno != idl_seqno) {
> +            ct_zone_remove_and_destroy(backer, zone);
> +        }
> +    }
> +
> +    CMAP_FOR_EACH (tp, node, &backer->ct_tps) {
> +        if (tp->last_used_seqno != idl_seqno) {
> +            /* Even timeout policy is not referenced by any ct_zone in the
> +             * database, vswitchd may not be able to delete it right away
> +             * since the datapath flow may still reference to the timeout
> +             * policy. Move the timeout policy to ct_tp_kill_list and try
> to
> +             * delete it when possible. */
> +            cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid));
> +            ovs_list_push_back(&backer->ct_tp_kill_list, &tp->list_node);
> +        }
> +    }
> +}
> +
> +static void
> +ct_zone_timeout_policy_sweep(const struct ofproto *ofproto)
>

Looks like layering violation
implementation should be in dpif-netlink or netlink-conntrack for kernel
side



> +{
> +    struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
> +    struct ct_timeout_policy *tp;
> +    int err;
> +
> +    if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) {
> +        LIST_FOR_EACH (tp, list_node, &backer->ct_tp_kill_list) {
> +            err = ct_dpif_del_timeout_policy(backer->dpif, tp->cdtp.id);
> +            if (!err) {
> +                ovs_list_remove(&tp->list_node);
> +                ct_timeout_policy_destroy(backer, tp);
> +            } else {
> +                VLOG_INFO_RL(&rl, "Failed to delete timeout policy id = "
> +                        "%"PRIu32" %s", tp->cdtp.id, ovs_strerror(err));
> +            }
> +        }
> +    }
> +}
> +
> +static void
> +ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
> +                                const struct ovsrec_datapath *dp_cfg,
> +                                unsigned int idl_seqno)
> +{
> +    struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
> +    struct ovsrec_ct_timeout_policy *tp_cfg;
> +    struct ovsrec_ct_zone *zone_cfg;
> +    struct ct_timeout_policy *tp;
> +    struct ct_zone *zone;
> +    uint16_t zone_id;
> +    bool new_zone;
> +    size_t i;
> +
> +    for (i = 0; i < dp_cfg->n_ct_zones; i++) {
> +        /* Update ct_zone config. */
> +        zone_cfg = dp_cfg->value_ct_zones[i];
> +        zone_id = dp_cfg->key_ct_zones[i];
> +        zone = ct_zone_lookup(&backer->ct_zones, zone_id);
> +        if (!zone) {
> +            new_zone = true;
> +            zone = ct_zone_alloc(zone_id);
> +        } else {
> +            new_zone = false;
> +        }
> +        zone->last_used_seqno = idl_seqno;
> +
> +        /* Update timeout policy. */
> +        tp_cfg = zone_cfg->timeout_policy;
> +        tp = ct_timeout_policy_lookup(&backer->ct_tps,
> &tp_cfg->header_.uuid);
> +        if (!tp) {
> +            tp = ct_timeout_policy_alloc(tp_cfg,
> backer->timeout_policy_ids);
> +            if (tp) {
> +                cmap_insert(&backer->ct_tps, &tp->node,
> uuid_hash(&tp->uuid));
> +                ct_dpif_set_timeout_policy(backer->dpif, &tp->cdtp);
> +            }
> +        } else {
> +            if (ct_timeout_policy_update(tp_cfg, tp)) {
> +                ct_dpif_set_timeout_policy(backer->dpif, &tp->cdtp);
> +            }
> +        }
> +        tp->last_used_seqno = idl_seqno;
> +
> +        /* Link zone with new timeout policy. */
> +        zone->tp_uuid = tp_cfg->header_.uuid;
> +        if (new_zone) {
> +            cmap_insert(&backer->ct_zones, &zone->node, hash_int(zone_id,
> 0));
> +        }
> +    }
> +    ct_zone_timeout_policy_revalidate(backer, idl_seqno);
> +    ct_zone_timeout_policy_sweep(ofproto);
> +}
> +
>  static bool
>  set_frag_handling(struct ofproto *ofproto_,
>                    enum ofputil_frag_handling frag_handling)
> @@ -6189,4 +6453,6 @@ const struct ofproto_class ofproto_dpif_class = {
>      get_datapath_version,       /* get_datapath_version */
>      type_set_config,
>      ct_flush,                   /* ct_flush */
> +    ct_zone_timeout_policy_reconfig,
> +    ct_zone_timeout_policy_sweep,
>  };
> diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
> index cd5321eb942c..170edf0e0e70 100644
> --- a/ofproto/ofproto-dpif.h
> +++ b/ofproto/ofproto-dpif.h
> @@ -245,6 +245,16 @@ struct dpif_backer {
>      /* Meter. */
>      struct id_pool *meter_ids;     /* Datapath meter allocation. */
>
> +    /* Connection tracking */
>




Looks like layering violation
should be in dpif-netlink or netlink-conntrack for kernel side
userspace datapath could use uuid directly


> +    struct id_pool  *timeout_policy_ids;    /* Datapath timeout policy id
> +                                             * allocation. */
>





> +    struct cmap ct_zones;                   /* "struct ct_zone"s indexed
> by
> +                                             * zone id. */
> +    struct cmap ct_tps;                     /* "struct ct_timeout_policy"s
> +                                             * indexed by uuid. */
>



Looks like layering violation
should be in dpif-netlink or netlink-conntrack for kernel side

+    struct ovs_list ct_tp_kill_list;        /* a list of timeout policy to
> be
> +                                             * deleted. */
>





> +
>      /* Version string of the datapath stored in OVSDB. */
>      char *dp_version_string;
>
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 7907d4bfb416..41e07f0ee23e 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -58,6 +58,7 @@
>  #include "tun-metadata.h"
>  #include "versions.h"
>  #include "vl-mff-map.h"
> +#include "vswitch-idl.h"
>
>  struct match;
>  struct ofputil_flow_mod;
> @@ -1872,6 +1873,13 @@ struct ofproto_class {
>      /* Flushes the connection tracking tables. If 'zone' is not NULL,
>       * only deletes connections in '*zone'. */
>      void (*ct_flush)(const struct ofproto *, const uint16_t *zone);
> +
> +    /* Reconfigures the zone-based conntrack tiemout policy based on the
> +     * ovsdb configuration. */
> +    void (*ct_zone_timeout_policy_reconfig)(const struct ofproto
> *ofproto_,
> +            const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno);
> +    /* Cleans up the to be deleted timeout policy in the pending kill
> list. */
> +    void (*ct_zone_timeout_policy_sweep)(const struct ofproto *ofproto_);
>  };
>
>  extern const struct ofproto_class ofproto_dpif_class;
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 1d6fc00696f8..373b8a4eba0c 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -935,6 +935,26 @@ ofproto_get_flow_restore_wait(void)
>      return flow_restore_wait;
>  }
>
> +/* Connection tracking configuration. */
> +void
> +ofproto_ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
> +                                        const struct ovsrec_datapath
> *dp_cfg,
> +                                        unsigned int idl_seqno)
> +{
> +    if (ofproto->ofproto_class->ct_zone_timeout_policy_reconfig) {
> +        ofproto->ofproto_class->ct_zone_timeout_policy_reconfig(
> +            ofproto, dp_cfg, idl_seqno);
> +    }
> +}
> +
> +void
> +ofproto_ct_zone_timeout_policy_sweep(const struct ofproto *ofproto)
> +{
> +    if (ofproto->ofproto_class->ct_zone_timeout_policy_sweep) {
> +        ofproto->ofproto_class->ct_zone_timeout_policy_sweep(ofproto);
> +    }
> +}
> +
>
>  /* Spanning Tree Protocol (STP) configuration. */
>
> diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
> index 6e4afffa17e0..2ae42374be36 100644
> --- a/ofproto/ofproto.h
> +++ b/ofproto/ofproto.h
> @@ -52,6 +52,7 @@ struct ovs_list;
>  struct lldp_status;
>  struct aa_settings;
>  struct aa_mapping_settings;
> +struct ovsrec_datapath;
>
>  /* Needed for the lock annotations. */
>  extern struct ovs_mutex ofproto_mutex;
> @@ -362,6 +363,9 @@ int ofproto_get_stp_status(struct ofproto *, struct
> ofproto_stp_status *);
>  int ofproto_set_rstp(struct ofproto *, const struct ofproto_rstp_settings
> *);
>  int ofproto_get_rstp_status(struct ofproto *, struct ofproto_rstp_status
> *);
>  void ofproto_set_vlan_limit(int vlan_limit);
> +void ofproto_ct_zone_timeout_policy_reconfig(const struct ofproto
> *ofproto,
> +    const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno);
> +void ofproto_ct_zone_timeout_policy_sweep(const struct ofproto *ofproto);
>
>  /* Configuration of ports. */
>  void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port);
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 2976771aeaba..39b7de7d8182 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -220,6 +220,11 @@ static long long int stats_timer = LLONG_MIN;
>  #define AA_REFRESH_INTERVAL (1000) /* In milliseconds. */
>  static long long int aa_refresh_timer = LLONG_MIN;
>
> +/* Each time this timer expires, the bridge tries to delete the timeout
> + * policies in the kill list. */
> +#define CT_ZONE_TIMEOUT_POLICY_TIMER_INTERVAL (5000) /* In milliseconds.
> */
> +static long long int ct_zone_timeout_policy_timer = LLONG_MIN;
> +
>  /* Whenever system interfaces are added, removed or change state, the
> bridge
>   * will be reconfigured.
>   */
> @@ -588,6 +593,40 @@ config_ofproto_types(const struct smap *other_config)
>  }
>
>  static void
> +reconfigure_conntrack_timeout_policy(const struct ovsrec_open_vswitch
> *cfg)
> +{
> +    struct bridge *br;
> +    int i;
> +
> +    for (i = 0; i < cfg->n_datapaths; i++) {
> +        const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i];
> +        char *key = cfg->key_datapaths[i];
> +
> +        HMAP_FOR_EACH (br, node, &all_bridges) {
> +            if (!strcmp(br->type, key)) {
> +                ofproto_ct_zone_timeout_policy_reconfig(br->ofproto,
> dp_cfg,
> +                                                        idl_seqno);
> +            }
> +            break;
> +        }
> +    }
> +}
> +
> +static void
> +run_ct_zone_timeout_policy_sweep(void)
> +{
> +    struct bridge *br;
> +
> +    if (time_msec() >= ct_zone_timeout_policy_timer) {
> +        HMAP_FOR_EACH (br, node, &all_bridges) {
> +            ofproto_ct_zone_timeout_policy_sweep(br->ofproto);
> +        }
> +        ct_zone_timeout_policy_timer = time_msec() +
> +
>  CT_ZONE_TIMEOUT_POLICY_TIMER_INTERVAL;
> +    }
> +}
> +
> +static void
>  bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
>  {
>      struct sockaddr_in *managers;
> @@ -669,6 +708,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch
> *ovs_cfg)
>      }
>
>      reconfigure_system_stats(ovs_cfg);
> +    reconfigure_conntrack_timeout_policy(ovs_cfg);
>
>      /* Complete the configuration. */
>      sflow_bridge_number = 0;
> @@ -3082,6 +3122,7 @@ bridge_run(void)
>      run_stats_update();
>      run_status_update();
>      run_system_stats();
> +    run_ct_zone_timeout_policy_sweep();
>  }
>
>  void
> --
> 2.7.4
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Justin Pettit Aug. 7, 2019, 4:46 a.m. UTC | #2
> On Aug 1, 2019, at 3:07 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> 
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 751535249e21..6336494e0bc8 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> 
> +static struct ct_timeout_policy *
> +ct_timeout_policy_alloc(struct ovsrec_ct_timeout_policy *tp_cfg,
> +                        struct id_pool *tp_ids)

As we discussed this in person, bringing the OVSDB configuration into ofproto-dpif is a pretty different model than we've used before.  I think it would be better to abstract the config and carry it through the ofproto layer.  As that will require a pretty significant restructuring of this patch, I'm going to give this a mostly cursory review.

> +{
> +    struct ct_timeout_policy *tp;
> +    size_t i;

This can be declared in a tighter scope.

> +static void
> +ct_timeout_policy_destroy(struct dpif_backer *backer,
> +                          struct ct_timeout_policy *tp)
> +{
> +    id_pool_free_id(backer->timeout_policy_ids, tp->cdtp.id);
> +    ovsrcu_postpone(free, tp);
> +}

I don't think there's currently a way for this to happen, but I think it might be safer to make sure that this entry is no longer on the 'ct_tps' or 'ct_tp_kill_list'  

> +static void
> +init_ct_zone_timeout_policy(struct dpif_backer *backer)

I think this function name would be clearer if policy were plural.

> +{
> ...
> +    /* Remove existing timeout policy from datapath. */
> +    struct ct_dpif_timeout_policy cdtp;
> +    struct ct_timeout_policy *tp;

This can be declared in a tighter scope.

> +static void
> +destroy_ct_zone_timeout_policy(struct dpif_backer *backer)

I think this function name would be clearer if policy were plural.

> +{
> +    struct ct_timeout_policy *tp;
> +    struct ct_zone *zone;
> +
> +    CMAP_FOR_EACH (zone, node, &backer->ct_zones) {
> +        ct_zone_remove_and_destroy(backer, zone);
> +    }
> +
> +    CMAP_FOR_EACH (tp, node, &backer->ct_tps) {
> +        cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid));
> +        ct_timeout_policy_destroy(backer, tp);
> +    }
> +
> +    cmap_destroy(&backer->ct_zones);
> +    cmap_destroy(&backer->ct_tps);
> +    id_pool_destroy(backer->timeout_policy_ids);
> +}

Don't we also need to free the items on 'ct_tp_kill_list'?

> +static void
> +ct_zone_timeout_policy_sweep(const struct ofproto *ofproto)
> +{
> +    struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
> +    struct ct_timeout_policy *tp;
> +    int err;
> +
> +    if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) {
> +        LIST_FOR_EACH (tp, list_node, &backer->ct_tp_kill_list) {
> +            err = ct_dpif_del_timeout_policy(backer->dpif, tp->cdtp.id);
> +            if (!err) {
> +                ovs_list_remove(&tp->list_node);

Should this loop be using LIST_FOR_EACH_SAFE(), since it looks like it can be modified?

> +static void
> +ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
> +                                const struct ovsrec_datapath *dp_cfg,
> +                                unsigned int idl_seqno)
> +{
> +    struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
> +    struct ovsrec_ct_timeout_policy *tp_cfg;
> +    struct ovsrec_ct_zone *zone_cfg;
> +    struct ct_timeout_policy *tp;
> +    struct ct_zone *zone;
> +    uint16_t zone_id;
> +    bool new_zone;
> +    size_t i;

Almost all of these can be declared in a tighter scope.

> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 7907d4bfb416..41e07f0ee23e 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> 
> @@ -1872,6 +1873,13 @@ struct ofproto_class {
>     /* Flushes the connection tracking tables. If 'zone' is not NULL,
>      * only deletes connections in '*zone'. */
>     void (*ct_flush)(const struct ofproto *, const uint16_t *zone);
> +
> +    /* Reconfigures the zone-based conntrack tiemout policy based on the

s/tiemout/timeout/

> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 2976771aeaba..39b7de7d8182 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> 
> static void
> +reconfigure_conntrack_timeout_policy(const struct ovsrec_open_vswitch *cfg)

This function uses "conntrack" in the name and others use "ct".  It would be nice to be consistent.  I would lean towards "ct", since it's shorter and used elsewhere in the code.

As I mentioned, I'll do a more thorough review of v3.

Thanks,

--Justin


-=-=-=-=-=-=-=-=-=-


diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 6336494e0bc8..7a7d72c89440 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -169,7 +169,7 @@ struct ct_timeout_policy {
 struct ct_zone {
     uint16_t id;
     unsigned int last_used_seqno;
-    struct uuid tp_uuid;            /* uuid that identifies a timeout policy in
+    struct uuid tp_uuid;            /* UUID that identifies a timeout policy in
                                      * struct dpif_backer's "ct_tps"  cmap. */
     struct cmap_node node;          /* Element in struct dpif_backer's
                                      * "ct_zones" cmap. */
@@ -215,8 +215,8 @@ static struct hmap all_ofproto_dpifs_by_uuid =
 
 static bool ofproto_use_tnl_push_pop = true;
 static void ofproto_unixctl_init(void);
-static void destroy_ct_zone_timeout_policy(struct dpif_backer *backer);
-static void init_ct_zone_timeout_policy(struct dpif_backer *backer);
+static void destroy_ct_zone_timeout_policies(struct dpif_backer *backer);
+static void init_ct_zone_timeout_policies(struct dpif_backer *backer);
 
 static inline struct ofproto_dpif *
 ofproto_dpif_cast(const struct ofproto *ofproto)
@@ -704,7 +704,7 @@ close_dpif_backer(struct dpif_backer *backer, bool del)
     }
     dpif_close(backer->dpif);
     id_pool_destroy(backer->meter_ids);
-    destroy_ct_zone_timeout_policy(backer);
+    destroy_ct_zone_timeout_policies(backer);
     free(backer);
 }
 
@@ -835,7 +835,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
         backer->meter_ids = NULL;
     }
 
-    init_ct_zone_timeout_policy(backer);
+    init_ct_zone_timeout_policies(backer);
 
     /* Make a pristine snapshot of 'support' into 'boottime_support'.
      * 'boottime_support' can be checked to prevent 'support' to be changed
@@ -5160,12 +5160,9 @@ static struct ct_timeout_policy *
 ct_timeout_policy_alloc(struct ovsrec_ct_timeout_policy *tp_cfg,
                         struct id_pool *tp_ids)
 {
-    struct ct_timeout_policy *tp;
-    size_t i;
-
-    tp = xzalloc(sizeof *tp);
+    struct ct_timeout_policy *tp = xzalloc(sizeof *tp);
     tp->uuid = tp_cfg->header_.uuid;
-    for (i = 0; i < tp_cfg->n_timeouts; i++) {
+    for (size_t i = 0; i < tp_cfg->n_timeouts; i++) {
         ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp,
             tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
     }
@@ -5201,27 +5198,25 @@ ct_timeout_policy_update(struct ovsrec_ct_timeout_policy *tp_cfg,
 }
 
 static void
-init_ct_zone_timeout_policy(struct dpif_backer *backer)
+init_ct_zone_timeout_policies(struct dpif_backer *backer)
 {
     cmap_init(&backer->ct_zones);
     cmap_init(&backer->ct_tps);
     backer->timeout_policy_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
     ovs_list_init(&backer->ct_tp_kill_list);
 
-    /* Remove existing timeout policy from datapath. */
-    struct ct_dpif_timeout_policy cdtp;
-    struct ct_timeout_policy *tp;
+    /* Remove existing timeout policies from the datapath. */
     void *state;
-    int err;
 
-    err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state);
+    int err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state);
     if (err) {
         return;
     }
 
+    struct ct_dpif_timeout_policy cdtp;
     while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state,
                                                     &cdtp))) {
-        tp = xzalloc(sizeof *tp);
+        struct ct_timeout_policy *tp = xzalloc(sizeof *tp);
         tp->cdtp = cdtp;
         id_pool_add(backer->timeout_policy_ids, cdtp.id);
         ovs_list_insert(&backer->ct_tp_kill_list, &tp->list_node);
@@ -5231,7 +5226,7 @@ init_ct_zone_timeout_policy(struct dpif_backer *backer)
 }
 
 static void
-destroy_ct_zone_timeout_policy(struct dpif_backer *backer)
+destroy_ct_zone_timeout_policies(struct dpif_backer *backer)
 {
     struct ct_timeout_policy *tp;
     struct ct_zone *zone;
@@ -5245,6 +5240,10 @@ destroy_ct_zone_timeout_policy(struct dpif_backer *backer)
         ct_timeout_policy_destroy(backer, tp);
     }
 
+    LIST_FOR_EACH_POP (tp, list_node, &backer->ct_tp_kill_list) {
+        ct_timeout_policy_destroy(backer, tp);
+    }
+
     cmap_destroy(&backer->ct_zones);
     cmap_destroy(&backer->ct_tps);
     id_pool_destroy(backer->timeout_policy_ids);
@@ -5280,11 +5279,11 @@ static void
 ct_zone_timeout_policy_sweep(const struct ofproto *ofproto)
 {
     struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
-    struct ct_timeout_policy *tp;
     int err;
 
     if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) {
-        LIST_FOR_EACH (tp, list_node, &backer->ct_tp_kill_list) {
+        struct ct_timeout_policy *tp, *next;
+        LIST_FOR_EACH_SAFE (tp, next, list_node, &backer->ct_tp_kill_list) {
             err = ct_dpif_del_timeout_policy(backer->dpif, tp->cdtp.id);
             if (!err) {
                 ovs_list_remove(&tp->list_node);
@@ -5303,19 +5302,14 @@ ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
                                 unsigned int idl_seqno)
 {
     struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
-    struct ovsrec_ct_timeout_policy *tp_cfg;
-    struct ovsrec_ct_zone *zone_cfg;
-    struct ct_timeout_policy *tp;
-    struct ct_zone *zone;
-    uint16_t zone_id;
-    bool new_zone;
-    size_t i;
 
-    for (i = 0; i < dp_cfg->n_ct_zones; i++) {
+    for (size_t i = 0; i < dp_cfg->n_ct_zones; i++) {
         /* Update ct_zone config. */
-        zone_cfg = dp_cfg->value_ct_zones[i];
-        zone_id = dp_cfg->key_ct_zones[i];
-        zone = ct_zone_lookup(&backer->ct_zones, zone_id);
+        bool new_zone;
+
+        struct ovsrec_ct_zone *zone_cfg = dp_cfg->value_ct_zones[i];
+        uint16_t zone_id = dp_cfg->key_ct_zones[i];
+        struct ct_zone *zone = ct_zone_lookup(&backer->ct_zones, zone_id);
         if (!zone) {
             new_zone = true;
             zone = ct_zone_alloc(zone_id);
@@ -5325,6 +5319,9 @@ ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
         zone->last_used_seqno = idl_seqno;
 
         /* Update timeout policy. */
+        struct ovsrec_ct_timeout_policy *tp_cfg;
+        struct ct_timeout_policy *tp;
+
         tp_cfg = zone_cfg->timeout_policy;
         tp = ct_timeout_policy_lookup(&backer->ct_tps, &tp_cfg->header_.uuid);
         if (!tp) {
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 170edf0e0e70..fb7b97a3b1bc 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -245,14 +245,14 @@ struct dpif_backer {
     /* Meter. */
     struct id_pool *meter_ids;     /* Datapath meter allocation. */
 
-    /* Connection tracking */
-    struct id_pool  *timeout_policy_ids;    /* Datapath timeout policy id
+    /* Connection tracking. */
+    struct id_pool *timeout_policy_ids;     /* Datapath timeout policy id
                                              * allocation. */
     struct cmap ct_zones;                   /* "struct ct_zone"s indexed by
                                              * zone id. */
     struct cmap ct_tps;                     /* "struct ct_timeout_policy"s
                                              * indexed by uuid. */
-    struct ovs_list ct_tp_kill_list;        /* a list of timeout policy to be
+    struct ovs_list ct_tp_kill_list;        /* A list of timeout policy to be
                                              * deleted. */
 
     /* Version string of the datapath stored in OVSDB. */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 41e07f0ee23e..da151cf24072 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -1874,11 +1874,11 @@ struct ofproto_class {
      * only deletes connections in '*zone'. */
     void (*ct_flush)(const struct ofproto *, const uint16_t *zone);
 
-    /* Reconfigures the zone-based conntrack tiemout policy based on the
-     * ovsdb configuration. */
+    /* Reconfigures the zone-based conntrack timeout policy based on the
+     * OVSDB configuration. */
     void (*ct_zone_timeout_policy_reconfig)(const struct ofproto *ofproto_,
             const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno);
-    /* Cleans up the to be deleted timeout policy in the pending kill list. */
+    /* Cleans up the to-be-deleted timeout policy in the pending kill list. */
     void (*ct_zone_timeout_policy_sweep)(const struct ofproto *ofproto_);
 };
 
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 39b7de7d8182..919d0394d566 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -593,7 +593,7 @@ config_ofproto_types(const struct smap *other_config)
 }
 
 static void
-reconfigure_conntrack_timeout_policy(const struct ovsrec_open_vswitch *cfg)
+reconfigure_ct_timeout_policy(const struct ovsrec_open_vswitch *cfg)
 {
     struct bridge *br;
     int i;
@@ -708,7 +708,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     }
 
     reconfigure_system_stats(ovs_cfg);
-    reconfigure_conntrack_timeout_policy(ovs_cfg);
+    reconfigure_ct_timeout_policy(ovs_cfg);
 
     /* Complete the configuration. */
     sflow_bridge_number = 0;
Justin Pettit Aug. 7, 2019, 4:57 a.m. UTC | #3
> On Aug 5, 2019, at 8:07 PM, Darrell Ball <dlu998@gmail.com> wrote:
> 
> On Thu, Aug 1, 2019 at 3:10 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> 
>> +struct ct_timeout_policy {
>> +    struct uuid uuid;
>> +    unsigned int last_used_seqno;
>> +    struct ct_dpif_timeout_policy cdtp;
>> +    struct cmap_node node;          /* Element in struct dpif_backer's
>> +                                     * "ct_tps" cmap. */
>> 
> 
> 
> This looks like a layering violation
> should be in dpif-netlink or netlink-conntrack for kernel side

Hi, Darrell.  As I mentioned in my code review, I had my own concerns about layering, but mine were from the top-down.  Yi-Hung and I didn't understand your concern here, since these seem to be structures that would be useful regardless of the implementation.  Can you explain a bit more about your layering concerns?

Thanks,

--Justin
Darrell Ball Aug. 7, 2019, 6:40 p.m. UTC | #4
On Tue, Aug 6, 2019 at 9:57 PM Justin Pettit <jpettit@ovn.org> wrote:

>
> > On Aug 5, 2019, at 8:07 PM, Darrell Ball <dlu998@gmail.com> wrote:
> >
> > On Thu, Aug 1, 2019 at 3:10 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> >
> >> +struct ct_timeout_policy {
> >> +    struct uuid uuid;
> >> +    unsigned int last_used_seqno;
> >> +    struct ct_dpif_timeout_policy cdtp;
> >> +    struct cmap_node node;          /* Element in struct dpif_backer's
> >> +                                     * "ct_tps" cmap. */
> >>
> >
> >
> > This looks like a layering violation
> > should be in dpif-netlink or netlink-conntrack for kernel side
>
> Hi, Darrell.  As I mentioned in my code review, I had my own concerns
> about layering, but mine were from the top-down.  Yi-Hung and I didn't
> understand your concern here, since these seem to be structures that would
> be useful regardless of the implementation.  Can you explain a bit more
> about your layering concerns?
>


I was off yesterday afternoon.

There are 3 behaviors with the patchset that are datapath specific

1/ Unwildcarding of commit flows with timeout policies
    As we discussed, the userspace conntrack does not need to do this and
would not since it is suboptimal
    since unnecessary flows are generated.
    This is because userspace conntrack would use a single shared profile
across all dl_types and ip_proto
    rather than expanding to 6 profiles as in the case of kernel across
dl_types and ip_protos.

2/ Userspace datepath/conntrack can easily manage cleanup of deleted
profiles using a refcount approach.
    For userspace conntrack, we don't need to read all the timeouts
profiles continually and to continually try to
    delete them from top down hoping to catch a window when the profile is
not referenced by a flow.

3/ In terms of timeout profile naming in userspace conntrack, we don't need
to manage a separate profile ID space for
    userspace conntrack. We can simply use the uuid directly. This
simplifies the management of profiles and always
     keeps knowledge of the profile name in sync across layers.

Thanks Darrell




>
> Thanks,
>
> --Justin
>
>
>
Darrell Ball Aug. 7, 2019, 6:51 p.m. UTC | #5
On Wed, Aug 7, 2019 at 11:40 AM Darrell Ball <dlu998@gmail.com> wrote:

>
>
> On Tue, Aug 6, 2019 at 9:57 PM Justin Pettit <jpettit@ovn.org> wrote:
>
>>
>> > On Aug 5, 2019, at 8:07 PM, Darrell Ball <dlu998@gmail.com> wrote:
>> >
>> > On Thu, Aug 1, 2019 at 3:10 PM Yi-Hung Wei <yihung.wei@gmail.com>
>> wrote:
>> >
>> >> +struct ct_timeout_policy {
>> >> +    struct uuid uuid;
>> >> +    unsigned int last_used_seqno;
>> >> +    struct ct_dpif_timeout_policy cdtp;
>> >> +    struct cmap_node node;          /* Element in struct dpif_backer's
>> >> +                                     * "ct_tps" cmap. */
>> >>
>> >
>> >
>> > This looks like a layering violation
>> > should be in dpif-netlink or netlink-conntrack for kernel side
>>
>> Hi, Darrell.  As I mentioned in my code review, I had my own concerns
>> about layering, but mine were from the top-down.  Yi-Hung and I didn't
>> understand your concern here, since these seem to be structures that would
>> be useful regardless of the implementation.  Can you explain a bit more
>> about your layering concerns?
>>
>
>
> I was off yesterday afternoon.
>
> There are 3 behaviors with the patchset that are datapath specific
>
> 1/ Unwildcarding of commit flows with timeout policies
>     As we discussed, the userspace conntrack does not need to do this and
> would not since it is suboptimal
>     since unnecessary flows are generated.
>     This is because userspace conntrack would use a single shared profile
> across all dl_types and ip_proto
>     rather than expanding to 6 profiles as in the case of kernel across
> dl_types and ip_protos.
>
> 2/ Userspace datepath/conntrack can easily manage cleanup of deleted
> profiles using a refcount approach.
>     For userspace conntrack, we don't need to read all the timeouts
> profiles continually and to continually try to
>     delete them from top down hoping to catch a window when the profile is
> not referenced by a flow.
>
> 3/ In terms of timeout profile naming in userspace conntrack, we don't
> need to manage a separate profile ID space for
>     userspace conntrack. We can simply use the uuid directly. This
> simplifies the management of profiles and always
>      keeps knowledge of the profile name in sync across layers.
>

Hence, the comments for this patch center around where the implementation
code is.
I think the code should live in datapath type specific code/files.

Of course, wrappers are needed at higher layers to call the datapath
specific implementations.


>
> Thanks Darrell
>
>
>
>
>>
>> Thanks,
>>
>> --Justin
>>
>>
>>
Darrell Ball Aug. 7, 2019, 9:38 p.m. UTC | #6
On Wed, Aug 7, 2019 at 11:51 AM Darrell Ball <dlu998@gmail.com> wrote:

>
>
> On Wed, Aug 7, 2019 at 11:40 AM Darrell Ball <dlu998@gmail.com> wrote:
>
>>
>>
>> On Tue, Aug 6, 2019 at 9:57 PM Justin Pettit <jpettit@ovn.org> wrote:
>>
>>>
>>> > On Aug 5, 2019, at 8:07 PM, Darrell Ball <dlu998@gmail.com> wrote:
>>> >
>>> > On Thu, Aug 1, 2019 at 3:10 PM Yi-Hung Wei <yihung.wei@gmail.com>
>>> wrote:
>>> >
>>> >> +struct ct_timeout_policy {
>>> >> +    struct uuid uuid;
>>> >> +    unsigned int last_used_seqno;
>>> >> +    struct ct_dpif_timeout_policy cdtp;
>>> >> +    struct cmap_node node;          /* Element in struct
>>> dpif_backer's
>>> >> +                                     * "ct_tps" cmap. */
>>> >>
>>> >
>>> >
>>> > This looks like a layering violation
>>> > should be in dpif-netlink or netlink-conntrack for kernel side
>>>
>>> Hi, Darrell.  As I mentioned in my code review, I had my own concerns
>>> about layering, but mine were from the top-down.  Yi-Hung and I didn't
>>> understand your concern here, since these seem to be structures that would
>>> be useful regardless of the implementation.  Can you explain a bit more
>>> about your layering concerns?
>>>
>>
>>
>> I was off yesterday afternoon.
>>
>> There are 3 behaviors with the patchset that are datapath specific
>>
>> 1/ Unwildcarding of commit flows with timeout policies
>>     As we discussed, the userspace conntrack does not need to do this and
>> would not since it is suboptimal
>>     since unnecessary flows are generated.
>>     This is because userspace conntrack would use a single shared profile
>> across all dl_types and ip_proto
>>     rather than expanding to 6 profiles as in the case of kernel across
>> dl_types and ip_protos.
>>
>> 2/ Userspace datepath/conntrack can easily manage cleanup of deleted
>> profiles using a refcount approach.
>>     For userspace conntrack, we don't need to read all the timeouts
>> profiles continually and to continually try to
>>     delete them from top down hoping to catch a window when the profile
>> is not referenced by a flow.
>>
>> 3/ In terms of timeout profile naming in userspace conntrack, we don't
>> need to manage a separate profile ID space for
>>     userspace conntrack. We can simply use the uuid directly. This
>> simplifies the management of profiles and always
>>      keeps knowledge of the profile name in sync across layers.
>>
>
I think '3' is not that important for the userspace datapath, since when
vswitchd restarts we loose everything in the datapath
and will need to reallocate u32 profile IDs and re-associate them, anyways.
So we never loose the association per se.
Also, we would need to increase the size of a datastructure field to use
the uuid directly.

Also, '1' and '2' can be implemented later if it delays things too much. I
can submit a followup patch(es) if needed.


>
> Hence, the comments for this patch center around where the implementation
> code is.
> I think the code should live in datapath type specific code/files.
>
> Of course, wrappers are needed at higher layers to call the datapath
> specific implementations.
>
>
>>
>> Thanks Darrell
>>
>>
>>
>>
>>>
>>> Thanks,
>>>
>>> --Justin
>>>
>>>
>>>
Justin Pettit Aug. 9, 2019, 8:23 p.m. UTC | #7
> On Aug 7, 2019, at 11:40 AM, Darrell Ball <dlu998@gmail.com> wrote:
> 
> There are 3 behaviors with the patchset that are datapath specific
> 
> 1/ Unwildcarding of commit flows with timeout policies
>     As we discussed, the userspace conntrack does not need to do this and would not since it is suboptimal
>     since unnecessary flows are generated.
>     This is because userspace conntrack would use a single shared profile across all dl_types and ip_proto
>     rather than expanding to 6 profiles as in the case of kernel across dl_types and ip_protos.

That makes sense.  Based on this feedback, Yi-Hung and I came up with a design that I think will address this in v3.

> 2/ Userspace datepath/conntrack can easily manage cleanup of deleted profiles using a refcount approach.
>     For userspace conntrack, we don't need to read all the timeouts profiles continually and to continually try to
>     delete them from top down hoping to catch a window when the profile is not referenced by a flow.

Yes, I think we don't need to introduce ct_zone_timeout_policy_sweep(), and it can just be implemented in the existing dpif_netlink_run().  That should be in v3.

> 3/ In terms of timeout profile naming in userspace conntrack, we don't need to manage a separate profile ID space for
>     userspace conntrack. We can simply use the uuid directly. This simplifies the management of profiles and always
>      keeps knowledge of the profile name in sync across layers.

I'm hesitant to use the OVSDB row UUIDs in the dpif layer.  v3 is going to restructure the handling of timeout policies pretty significantly, so let's discuss the particulars once that's out.

Thanks,

--Justin
Darrell Ball Aug. 11, 2019, 6:34 p.m. UTC | #8
On Fri, Aug 9, 2019 at 1:23 PM Justin Pettit <jpettit@ovn.org> wrote:

>
> > On Aug 7, 2019, at 11:40 AM, Darrell Ball <dlu998@gmail.com> wrote:
> >
> > There are 3 behaviors with the patchset that are datapath specific
> >
> > 1/ Unwildcarding of commit flows with timeout policies
> >     As we discussed, the userspace conntrack does not need to do this
> and would not since it is suboptimal
> >     since unnecessary flows are generated.
> >     This is because userspace conntrack would use a single shared
> profile across all dl_types and ip_proto
> >     rather than expanding to 6 profiles as in the case of kernel across
> dl_types and ip_protos.
>
> That makes sense.  Based on this feedback, Yi-Hung and I came up with a
> design that I think will address this in v3.
>

I spotted the other e-mail.


>
> > 2/ Userspace datepath/conntrack can easily manage cleanup of deleted
> profiles using a refcount approach.
> >     For userspace conntrack, we don't need to read all the timeouts
> profiles continually and to continually try to
> >     delete them from top down hoping to catch a window when the profile
> is not referenced by a flow.
>
> Yes, I think we don't need to introduce ct_zone_timeout_policy_sweep(),
> and it can just be implemented in the existing dpif_netlink_run().  That
> should be in v3.
>

yep


>
> > 3/ In terms of timeout profile naming in userspace conntrack, we don't
> need to manage a separate profile ID space for
> >     userspace conntrack. We can simply use the uuid directly. This
> simplifies the management of profiles and always
> >      keeps knowledge of the profile name in sync across layers.
>
> I'm hesitant to use the OVSDB row UUIDs in the dpif layer.  v3 is going to
> restructure the handling of timeout policies pretty significantly, so let's
> discuss the particulars once that's out.
>

I am not overly concerned about '3' in general for the userspace datapath
since vswitchd restart will wipe out the dynamic u32 profile IDs anyways
and they will need to be re-associated, so they should be tightly coupled
to the database uuids.


>
> Thanks,
>
> --Justin
>
>
>

Patch
diff mbox series

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 751535249e21..6336494e0bc8 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -156,6 +156,25 @@  struct ofport_dpif {
     size_t n_qdscp;
 };
 
+struct ct_timeout_policy {
+    struct uuid uuid;
+    unsigned int last_used_seqno;
+    struct ct_dpif_timeout_policy cdtp;
+    struct cmap_node node;          /* Element in struct dpif_backer's
+                                     * "ct_tps" cmap. */
+    struct ovs_list list_node;      /* Element in struct dpif_backer's
+                                     * "ct_tp_kill_list" list. */
+};
+
+struct ct_zone {
+    uint16_t id;
+    unsigned int last_used_seqno;
+    struct uuid tp_uuid;            /* uuid that identifies a timeout policy in
+                                     * struct dpif_backer's "ct_tps"  cmap. */
+    struct cmap_node node;          /* Element in struct dpif_backer's
+                                     * "ct_zones" cmap. */
+};
+
 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
                                        ofp_port_t);
 
@@ -196,6 +215,8 @@  static struct hmap all_ofproto_dpifs_by_uuid =
 
 static bool ofproto_use_tnl_push_pop = true;
 static void ofproto_unixctl_init(void);
+static void destroy_ct_zone_timeout_policy(struct dpif_backer *backer);
+static void init_ct_zone_timeout_policy(struct dpif_backer *backer);
 
 static inline struct ofproto_dpif *
 ofproto_dpif_cast(const struct ofproto *ofproto)
@@ -683,6 +704,7 @@  close_dpif_backer(struct dpif_backer *backer, bool del)
     }
     dpif_close(backer->dpif);
     id_pool_destroy(backer->meter_ids);
+    destroy_ct_zone_timeout_policy(backer);
     free(backer);
 }
 
@@ -694,6 +716,8 @@  struct odp_garbage {
 
 static void check_support(struct dpif_backer *backer);
 
+#define MAX_TIMEOUT_POLICY_ID UINT32_MAX
+
 static int
 open_dpif_backer(const char *type, struct dpif_backer **backerp)
 {
@@ -811,6 +835,8 @@  open_dpif_backer(const char *type, struct dpif_backer **backerp)
         backer->meter_ids = NULL;
     }
 
+    init_ct_zone_timeout_policy(backer);
+
     /* Make a pristine snapshot of 'support' into 'boottime_support'.
      * 'boottime_support' can be checked to prevent 'support' to be changed
      * beyond the datapath capabilities. In case 'support' is changed by
@@ -5086,6 +5112,244 @@  ct_flush(const struct ofproto *ofproto_, const uint16_t *zone)
     ct_dpif_flush(ofproto->backer->dpif, zone, NULL);
 }
 
+static struct ct_zone *
+ct_zone_lookup(struct cmap *ct_zones, uint16_t zone_id)
+{
+    struct ct_zone *zone;
+
+    CMAP_FOR_EACH_WITH_HASH (zone, node, hash_int(zone_id, 0), ct_zones) {
+        if (zone->id == zone_id) {
+            return zone;
+        }
+    }
+    return NULL;
+}
+
+static struct ct_zone *
+ct_zone_alloc(uint16_t zone_id)
+{
+    struct ct_zone *zone;
+
+    zone = xzalloc(sizeof *zone);
+    zone->id = zone_id;
+
+    return zone;
+}
+
+static void
+ct_zone_remove_and_destroy(struct dpif_backer *backer, struct ct_zone *zone)
+{
+    cmap_remove(&backer->ct_zones, &zone->node, hash_int(zone->id, 0));
+    ovsrcu_postpone(free, zone);
+}
+
+static struct ct_timeout_policy *
+ct_timeout_policy_lookup(struct cmap *ct_tps, struct uuid *uuid)
+{
+    struct ct_timeout_policy *tp;
+
+    CMAP_FOR_EACH_WITH_HASH (tp, node, uuid_hash(uuid), ct_tps) {
+        if (uuid_equals(&tp->uuid, uuid)) {
+            return tp;
+        }
+    }
+    return NULL;
+}
+
+static struct ct_timeout_policy *
+ct_timeout_policy_alloc(struct ovsrec_ct_timeout_policy *tp_cfg,
+                        struct id_pool *tp_ids)
+{
+    struct ct_timeout_policy *tp;
+    size_t i;
+
+    tp = xzalloc(sizeof *tp);
+    tp->uuid = tp_cfg->header_.uuid;
+    for (i = 0; i < tp_cfg->n_timeouts; i++) {
+        ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp,
+            tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
+    }
+    if (!id_pool_alloc_id(tp_ids, &tp->cdtp.id)) {
+        VLOG_WARN_RL(&rl, "failed to allocate timeout policy id.");
+        free(tp);
+        return NULL;
+    }
+
+    return tp;
+}
+
+static void
+ct_timeout_policy_destroy(struct dpif_backer *backer,
+                          struct ct_timeout_policy *tp)
+{
+    id_pool_free_id(backer->timeout_policy_ids, tp->cdtp.id);
+    ovsrcu_postpone(free, tp);
+}
+
+static bool
+ct_timeout_policy_update(struct ovsrec_ct_timeout_policy *tp_cfg,
+                         struct ct_timeout_policy *tp)
+{
+    size_t i;
+    bool changed = false;
+
+    for (i = 0; i < tp_cfg->n_timeouts; i++) {
+        changed |= ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp,
+                        tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
+    }
+    return changed;
+}
+
+static void
+init_ct_zone_timeout_policy(struct dpif_backer *backer)
+{
+    cmap_init(&backer->ct_zones);
+    cmap_init(&backer->ct_tps);
+    backer->timeout_policy_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
+    ovs_list_init(&backer->ct_tp_kill_list);
+
+    /* Remove existing timeout policy from datapath. */
+    struct ct_dpif_timeout_policy cdtp;
+    struct ct_timeout_policy *tp;
+    void *state;
+    int err;
+
+    err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state);
+    if (err) {
+        return;
+    }
+
+    while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state,
+                                                    &cdtp))) {
+        tp = xzalloc(sizeof *tp);
+        tp->cdtp = cdtp;
+        id_pool_add(backer->timeout_policy_ids, cdtp.id);
+        ovs_list_insert(&backer->ct_tp_kill_list, &tp->list_node);
+    }
+
+    ct_dpif_timeout_policy_dump_done(backer->dpif, state);
+}
+
+static void
+destroy_ct_zone_timeout_policy(struct dpif_backer *backer)
+{
+    struct ct_timeout_policy *tp;
+    struct ct_zone *zone;
+
+    CMAP_FOR_EACH (zone, node, &backer->ct_zones) {
+        ct_zone_remove_and_destroy(backer, zone);
+    }
+
+    CMAP_FOR_EACH (tp, node, &backer->ct_tps) {
+        cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid));
+        ct_timeout_policy_destroy(backer, tp);
+    }
+
+    cmap_destroy(&backer->ct_zones);
+    cmap_destroy(&backer->ct_tps);
+    id_pool_destroy(backer->timeout_policy_ids);
+}
+
+static void
+ct_zone_timeout_policy_revalidate(struct dpif_backer *backer,
+                                  unsigned int idl_seqno)
+{
+    struct ct_timeout_policy *tp;
+    struct ct_zone *zone;
+
+    CMAP_FOR_EACH (zone, node, &backer->ct_zones) {
+        if (zone->last_used_seqno != idl_seqno) {
+            ct_zone_remove_and_destroy(backer, zone);
+        }
+    }
+
+    CMAP_FOR_EACH (tp, node, &backer->ct_tps) {
+        if (tp->last_used_seqno != idl_seqno) {
+            /* Even timeout policy is not referenced by any ct_zone in the
+             * database, vswitchd may not be able to delete it right away
+             * since the datapath flow may still reference to the timeout
+             * policy. Move the timeout policy to ct_tp_kill_list and try to
+             * delete it when possible. */
+            cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid));
+            ovs_list_push_back(&backer->ct_tp_kill_list, &tp->list_node);
+        }
+    }
+}
+
+static void
+ct_zone_timeout_policy_sweep(const struct ofproto *ofproto)
+{
+    struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
+    struct ct_timeout_policy *tp;
+    int err;
+
+    if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) {
+        LIST_FOR_EACH (tp, list_node, &backer->ct_tp_kill_list) {
+            err = ct_dpif_del_timeout_policy(backer->dpif, tp->cdtp.id);
+            if (!err) {
+                ovs_list_remove(&tp->list_node);
+                ct_timeout_policy_destroy(backer, tp);
+            } else {
+                VLOG_INFO_RL(&rl, "Failed to delete timeout policy id = "
+                        "%"PRIu32" %s", tp->cdtp.id, ovs_strerror(err));
+            }
+        }
+    }
+}
+
+static void
+ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
+                                const struct ovsrec_datapath *dp_cfg,
+                                unsigned int idl_seqno)
+{
+    struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer;
+    struct ovsrec_ct_timeout_policy *tp_cfg;
+    struct ovsrec_ct_zone *zone_cfg;
+    struct ct_timeout_policy *tp;
+    struct ct_zone *zone;
+    uint16_t zone_id;
+    bool new_zone;
+    size_t i;
+
+    for (i = 0; i < dp_cfg->n_ct_zones; i++) {
+        /* Update ct_zone config. */
+        zone_cfg = dp_cfg->value_ct_zones[i];
+        zone_id = dp_cfg->key_ct_zones[i];
+        zone = ct_zone_lookup(&backer->ct_zones, zone_id);
+        if (!zone) {
+            new_zone = true;
+            zone = ct_zone_alloc(zone_id);
+        } else {
+            new_zone = false;
+        }
+        zone->last_used_seqno = idl_seqno;
+
+        /* Update timeout policy. */
+        tp_cfg = zone_cfg->timeout_policy;
+        tp = ct_timeout_policy_lookup(&backer->ct_tps, &tp_cfg->header_.uuid);
+        if (!tp) {
+            tp = ct_timeout_policy_alloc(tp_cfg, backer->timeout_policy_ids);
+            if (tp) {
+                cmap_insert(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid));
+                ct_dpif_set_timeout_policy(backer->dpif, &tp->cdtp);
+            }
+        } else {
+            if (ct_timeout_policy_update(tp_cfg, tp)) {
+                ct_dpif_set_timeout_policy(backer->dpif, &tp->cdtp);
+            }
+        }
+        tp->last_used_seqno = idl_seqno;
+
+        /* Link zone with new timeout policy. */
+        zone->tp_uuid = tp_cfg->header_.uuid;
+        if (new_zone) {
+            cmap_insert(&backer->ct_zones, &zone->node, hash_int(zone_id, 0));
+        }
+    }
+    ct_zone_timeout_policy_revalidate(backer, idl_seqno);
+    ct_zone_timeout_policy_sweep(ofproto);
+}
+
 static bool
 set_frag_handling(struct ofproto *ofproto_,
                   enum ofputil_frag_handling frag_handling)
@@ -6189,4 +6453,6 @@  const struct ofproto_class ofproto_dpif_class = {
     get_datapath_version,       /* get_datapath_version */
     type_set_config,
     ct_flush,                   /* ct_flush */
+    ct_zone_timeout_policy_reconfig,
+    ct_zone_timeout_policy_sweep,
 };
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index cd5321eb942c..170edf0e0e70 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -245,6 +245,16 @@  struct dpif_backer {
     /* Meter. */
     struct id_pool *meter_ids;     /* Datapath meter allocation. */
 
+    /* Connection tracking */
+    struct id_pool  *timeout_policy_ids;    /* Datapath timeout policy id
+                                             * allocation. */
+    struct cmap ct_zones;                   /* "struct ct_zone"s indexed by
+                                             * zone id. */
+    struct cmap ct_tps;                     /* "struct ct_timeout_policy"s
+                                             * indexed by uuid. */
+    struct ovs_list ct_tp_kill_list;        /* a list of timeout policy to be
+                                             * deleted. */
+
     /* Version string of the datapath stored in OVSDB. */
     char *dp_version_string;
 
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 7907d4bfb416..41e07f0ee23e 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -58,6 +58,7 @@ 
 #include "tun-metadata.h"
 #include "versions.h"
 #include "vl-mff-map.h"
+#include "vswitch-idl.h"
 
 struct match;
 struct ofputil_flow_mod;
@@ -1872,6 +1873,13 @@  struct ofproto_class {
     /* Flushes the connection tracking tables. If 'zone' is not NULL,
      * only deletes connections in '*zone'. */
     void (*ct_flush)(const struct ofproto *, const uint16_t *zone);
+
+    /* Reconfigures the zone-based conntrack tiemout policy based on the
+     * ovsdb configuration. */
+    void (*ct_zone_timeout_policy_reconfig)(const struct ofproto *ofproto_,
+            const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno);
+    /* Cleans up the to be deleted timeout policy in the pending kill list. */
+    void (*ct_zone_timeout_policy_sweep)(const struct ofproto *ofproto_);
 };
 
 extern const struct ofproto_class ofproto_dpif_class;
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 1d6fc00696f8..373b8a4eba0c 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -935,6 +935,26 @@  ofproto_get_flow_restore_wait(void)
     return flow_restore_wait;
 }
 
+/* Connection tracking configuration. */
+void
+ofproto_ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
+                                        const struct ovsrec_datapath *dp_cfg,
+                                        unsigned int idl_seqno)
+{
+    if (ofproto->ofproto_class->ct_zone_timeout_policy_reconfig) {
+        ofproto->ofproto_class->ct_zone_timeout_policy_reconfig(
+            ofproto, dp_cfg, idl_seqno);
+    }
+}
+
+void
+ofproto_ct_zone_timeout_policy_sweep(const struct ofproto *ofproto)
+{
+    if (ofproto->ofproto_class->ct_zone_timeout_policy_sweep) {
+        ofproto->ofproto_class->ct_zone_timeout_policy_sweep(ofproto);
+    }
+}
+
 
 /* Spanning Tree Protocol (STP) configuration. */
 
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 6e4afffa17e0..2ae42374be36 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -52,6 +52,7 @@  struct ovs_list;
 struct lldp_status;
 struct aa_settings;
 struct aa_mapping_settings;
+struct ovsrec_datapath;
 
 /* Needed for the lock annotations. */
 extern struct ovs_mutex ofproto_mutex;
@@ -362,6 +363,9 @@  int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *);
 int ofproto_set_rstp(struct ofproto *, const struct ofproto_rstp_settings *);
 int ofproto_get_rstp_status(struct ofproto *, struct ofproto_rstp_status *);
 void ofproto_set_vlan_limit(int vlan_limit);
+void ofproto_ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto,
+    const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno);
+void ofproto_ct_zone_timeout_policy_sweep(const struct ofproto *ofproto);
 
 /* Configuration of ports. */
 void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 2976771aeaba..39b7de7d8182 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -220,6 +220,11 @@  static long long int stats_timer = LLONG_MIN;
 #define AA_REFRESH_INTERVAL (1000) /* In milliseconds. */
 static long long int aa_refresh_timer = LLONG_MIN;
 
+/* Each time this timer expires, the bridge tries to delete the timeout
+ * policies in the kill list. */
+#define CT_ZONE_TIMEOUT_POLICY_TIMER_INTERVAL (5000) /* In milliseconds. */
+static long long int ct_zone_timeout_policy_timer = LLONG_MIN;
+
 /* Whenever system interfaces are added, removed or change state, the bridge
  * will be reconfigured.
  */
@@ -588,6 +593,40 @@  config_ofproto_types(const struct smap *other_config)
 }
 
 static void
+reconfigure_conntrack_timeout_policy(const struct ovsrec_open_vswitch *cfg)
+{
+    struct bridge *br;
+    int i;
+
+    for (i = 0; i < cfg->n_datapaths; i++) {
+        const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i];
+        char *key = cfg->key_datapaths[i];
+
+        HMAP_FOR_EACH (br, node, &all_bridges) {
+            if (!strcmp(br->type, key)) {
+                ofproto_ct_zone_timeout_policy_reconfig(br->ofproto, dp_cfg,
+                                                        idl_seqno);
+            }
+            break;
+        }
+    }
+}
+
+static void
+run_ct_zone_timeout_policy_sweep(void)
+{
+    struct bridge *br;
+
+    if (time_msec() >= ct_zone_timeout_policy_timer) {
+        HMAP_FOR_EACH (br, node, &all_bridges) {
+            ofproto_ct_zone_timeout_policy_sweep(br->ofproto);
+        }
+        ct_zone_timeout_policy_timer = time_msec() +
+                                       CT_ZONE_TIMEOUT_POLICY_TIMER_INTERVAL;
+    }
+}
+
+static void
 bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
 {
     struct sockaddr_in *managers;
@@ -669,6 +708,7 @@  bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     }
 
     reconfigure_system_stats(ovs_cfg);
+    reconfigure_conntrack_timeout_policy(ovs_cfg);
 
     /* Complete the configuration. */
     sflow_bridge_number = 0;
@@ -3082,6 +3122,7 @@  bridge_run(void)
     run_stats_update();
     run_status_update();
     run_system_stats();
+    run_ct_zone_timeout_policy_sweep();
 }
 
 void