diff mbox

[ovs-dev,RFC] ovn: Support sample action in logical datapath

Message ID 33afdc11-59b3-d5c1-6db2-a8337929e859@gmail.com
State RFC
Headers show

Commit Message

Valentine Sinitsyn Oct. 14, 2016, 11:35 a.m. UTC
Hi all,

This is a quick attempt to implement sample action at logical port 
level.The goal is to export IPFIX flows for logical ports, yet it is 
easy to extend this approach to logical switches as well.

Nothing is done to provision OVS instances with required 
Flow_Sample_Collector_Set and IPFIX entries at this point.

Does the approach I take looks sensible? If so, I can add tests and 
re-send this patch for in-depth review.

Many thanks.

Signed-off-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>

---
  include/ovn/actions.h     |  12 +++++-
  ovn/lib/actions.c         | 102 
++++++++++++++++++++++++++++++++++++++++++++++
  ovn/northd/ovn-northd.c   |  77 ++++++++++++++++++++++++++++++++--
  ovn/ovn-nb.ovsschema      |  32 +++++++++++++--
  ovn/ovn-nb.xml            |  36 ++++++++++++++++
  ovn/utilities/ovn-nbctl.c |   5 +++
  ovn/utilities/ovn-trace.c |   4 ++
  7 files changed, 260 insertions(+), 8 deletions(-)

Comments

Valentine Sinitsyn Oct. 14, 2016, 11:42 a.m. UTC | #1
Looks like Thunderbird messed up my patch when sending it.
Sorry for that - although I hope it shouldn't be a problem for a 
high-level review.

Best,
Valentine

On 14.10.2016 16:35, Valentine Sinitsyn wrote:
> Hi all,
>
> This is a quick attempt to implement sample action at logical port
> level.The goal is to export IPFIX flows for logical ports, yet it is
> easy to extend this approach to logical switches as well.
>
> Nothing is done to provision OVS instances with required
> Flow_Sample_Collector_Set and IPFIX entries at this point.
>
> Does the approach I take looks sensible? If so, I can add tests and
> re-send this patch for in-depth review.
>
> Many thanks.
>
> Signed-off-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
>
> ---
>  include/ovn/actions.h     |  12 +++++-
>  ovn/lib/actions.c         | 102
> ++++++++++++++++++++++++++++++++++++++++++++++
>  ovn/northd/ovn-northd.c   |  77 ++++++++++++++++++++++++++++++++--
>  ovn/ovn-nb.ovsschema      |  32 +++++++++++++--
>  ovn/ovn-nb.xml            |  36 ++++++++++++++++
>  ovn/utilities/ovn-nbctl.c |   5 +++
>  ovn/utilities/ovn-trace.c |   4 ++
>  7 files changed, 260 insertions(+), 8 deletions(-)
>
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 2905937..7dc4bb9 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -66,7 +66,8 @@ struct simap;
>      OVNACT(GET_ND,        ovnact_get_mac_bind)      \
>      OVNACT(PUT_ND,        ovnact_put_mac_bind)      \
>      OVNACT(PUT_DHCPV4_OPTS, ovnact_put_dhcp_opts)   \
> -    OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts)
> +    OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts)   \
> +    OVNACT(SAMPLE,      ovnact_sample)
>   /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -219,6 +220,15 @@ struct ovnact_put_dhcp_opts {
>      size_t n_options;
>  };
>  +/* OVNACT_SAMPLE */
> +struct ovnact_sample {
> +    struct ovnact ovnact;
> +    uint32_t collector_set_id;
> +    uint32_t probability;
> +    uint32_t obs_domain_id;
> +    uint32_t obs_point_id;
> +};
> +
>  /* Internal use by the helpers below. */
>  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
> index 59131dd..e1debb3 100644
> --- a/ovn/lib/actions.c
> +++ b/ovn/lib/actions.c
> @@ -1614,6 +1614,106 @@ free_PUT_DHCPV6_OPTS(struct ovnact_put_dhcp_opts
> *pdo)
>  {
>      free_put_dhcp_opts(pdo);
>  }
> +
> +static bool
> +parse_sample_arg_get_int_helper(struct lexer *lexer, uint32_t *out)
> +{
> +    if (!lexer_force_match(lexer, LEX_T_EQUALS)) {
> +    return false;
> +    }
> +    if (lexer->token.type == LEX_T_INTEGER) {
> +    *out = ntohl(lexer->token.value.be32_int);
> +    } else {
> +    lexer_syntax_error(lexer, "expecting integer");
> +    return false;
> +    }
> +    lexer_get(lexer);
> +    return true;
> +}
> +
> +static void
> +parse_sample_args(struct action_context *ctx,
> +                  struct ovnact_sample *cc)
> +{
> +    bool ok;
> +
> +    if (lexer_match_id(ctx->lexer, "collector_set_id")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->collector_set_id);
> +    if (!ok)
> +        return;
> +    } else if (lexer_match_id(ctx->lexer, "probability")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->probability);
> +    if (!ok)
> +        return;
> +    } else if (lexer_match_id(ctx->lexer, "obs_domain_id")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->obs_domain_id);
> +    if (!ok)
> +        return;
> +    } else if (lexer_match_id(ctx->lexer, "obs_point_id")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->obs_point_id);
> +    if (!ok)
> +        return;
> +    } else {
> +        lexer_syntax_error(ctx->lexer, NULL);
> +    }
> +}
> +
> +static void
> +parse_SAMPLE(struct action_context *ctx)
> +{
> +    struct ovnact_sample *sample = ovnact_put_SAMPLE(ctx->ovnacts);
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +            parse_sample_args(ctx, sample);
> +            if (ctx->lexer->error) {
> +                return;
> +            }
> +            lexer_match(ctx->lexer, LEX_T_COMMA);
> +        }
> +    }
> +
> +    if (!sample->probability)
> +    lexer_error(ctx->lexer, "sample probability can't be zero");
> +}
> +
> +static void
> +encode_SAMPLE(const struct ovnact_sample *sample,
> +              const struct ovnact_encode_params *ep OVS_UNUSED,
> +              struct ofpbuf *ofpacts)
> +{
> +    struct ofpact_sample *action = ofpact_put_SAMPLE(ofpacts);
> +    action->collector_set_id = sample->collector_set_id;
> +    /* sampling is 16 bit wide in the northbound database */
> +    action->probability = (uint16_t)sample->probability;
> +    action->obs_domain_id = sample->obs_domain_id;
> +    action->obs_point_id = sample->obs_point_id;
> +    action->sampling_port = OFPP_NONE;
> +}
> +
> +static void
> +format_SAMPLE(const struct ovnact_sample *sample, struct ds *s)
> +{
> +    ds_put_format(s,
> +              "sample(collector_set_id=%" PRIu32 \
> +          ", probability=%" PRIu32 \
> +          ", obs_domain_id=%" PRIu32 \
> +          ", obs_point_id=%" PRIu32 ")",
> +          sample->collector_set_id,
> +          sample->probability,
> +          sample->obs_domain_id,
> +          sample->obs_point_id);
> +}
> +
> +static void
> +free_SAMPLE(struct ovnact_sample *sample OVS_UNUSED)
> +{
> +    /* Nothing to do here */
> +}
> +
>  
>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>  static void
> @@ -1685,6 +1785,8 @@ parse_action(struct action_context *ctx)
>          parse_get_mac_bind(ctx, 128, ovnact_put_GET_ND(ctx->ovnacts));
>      } else if (lexer_match_id(ctx->lexer, "put_nd")) {
>          parse_put_mac_bind(ctx, 128, ovnact_put_PUT_ND(ctx->ovnacts));
> +    } else if (lexer_match_id(ctx->lexer, "sample")) {
> +    parse_SAMPLE(ctx);
>      } else {
>          lexer_syntax_error(ctx->lexer, "expecting action");
>      }
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index eeeb41d..fd3616e 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -110,6 +110,7 @@ enum ovn_stage {
>      PIPELINE_STAGE(SWITCH, IN,  DHCP_OPTIONS,   10,
> "ls_in_dhcp_options") \
>      PIPELINE_STAGE(SWITCH, IN,  DHCP_RESPONSE,  11,
> "ls_in_dhcp_response") \
>      PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,        12, "ls_in_l2_lkup")    \
> +    PIPELINE_STAGE(SWITCH, IN,  SAMPLE,         13, "ls_in_sample")      \
>                                                                        \
>      /* Logical switch egress stages. */                               \
>      PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       0, "ls_out_pre_lb")     \
> @@ -120,6 +121,7 @@ enum ovn_stage {
>      PIPELINE_STAGE(SWITCH, OUT, STATEFUL,     5, "ls_out_stateful")    \
>      PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP,  6, "ls_out_port_sec_ip")
>    \
>      PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2,  7, "ls_out_port_sec_l2")
>    \
> +    PIPELINE_STAGE(SWITCH, OUT, SAMPLE,       8, "ls_out_sample")      \
>                                                                        \
>      /* Logical router ingress stages. */                              \
>      PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
> @@ -2591,6 +2593,24 @@ build_stateful(struct ovn_datapath *od, struct
> hmap *lflows)
>  }
>   static void
> +format_sample(struct ds *actions, const struct nbrec_ipfix_options
> *ipfix_opts)
> +{
> +    ds_put_format(actions,
> +              "sample(collector_set_id=%" PRIu64 ", probability=%" PRIu64,
> +          ipfix_opts->collector_set_id, ipfix_opts->sampling);
> +
> +    if (ipfix_opts->obs_domain_id)
> +    ds_put_format(actions,
> +              ", obs_domain_id=%" PRIu64, *ipfix_opts->obs_domain_id);
> +
> +    if (ipfix_opts->obs_point_id)
> +    ds_put_format(actions,
> +              ", obs_point_id=%" PRIu64, *ipfix_opts->obs_point_id);
> +
> +    ds_put_cstr(actions, "); ");
> +}
> +
> +static void
>  build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>                      struct hmap *lflows, struct hmap *mcgroups)
>  {
> @@ -2920,11 +2940,12 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>                                ETH_ADDR_ARGS(mac));
>                   ds_clear(&actions);
> -                ds_put_format(&actions, "outport = %s; output;",
> op->json_key);
> +                ds_put_format(&actions, "outport = %s; next;",
> op->json_key);
>                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
>                                ds_cstr(&match), ds_cstr(&actions));
>              } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
>                  if (lsp_is_enabled(op->nbsp)) {
> +            /* TODO: Consider sampling here as well */
>                      ovn_multicast_add(mcgroups, &mc_unknown, op);
>                      op->od->has_unknown = true;
>                  }
> @@ -2939,7 +2960,7 @@ build_lswitch_flows(struct hmap *datapaths, struct
> hmap *ports,
>                                ETH_ADDR_ARGS(mac));
>                   ds_clear(&actions);
> -                ds_put_format(&actions, "outport = %s; output;",
> op->json_key);
> +                ds_put_format(&actions, "outport = %s; next;",
> op->json_key);
>                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
>                                ds_cstr(&match), ds_cstr(&actions));
>              } else {
> @@ -2960,8 +2981,32 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>           if (od->has_unknown) {
>              ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
> -                          "outport = \""MC_UNKNOWN"\"; output;");
> +                          "outport = \""MC_UNKNOWN"\"; next;");
> +        }
> +    }
> +
> +    /* Ingress table 13: Sampling (priority 50) */
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (!op->nbsp || !op->nbsp->ipfix_options) {
> +            continue;
> +        }
> +
> +    ds_clear(&match);
> +    ds_put_format(&match, "inport == %s", op->json_key);
> +
> +    ds_clear(&actions);
> +    format_sample(&actions, op->nbsp->ipfix_options);
> +    ds_put_format(&actions, "output;");
> +    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_SAMPLE, 50,
> +              ds_cstr(&match), ds_cstr(&actions));
> +    }
> +
> +    /* Ingress table 13: Sampling (priority 0) - default output action */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
> +            continue;
>          }
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_SAMPLE, 0, "1", "output;");
>      }
>       /* Egress tables 6: Egress port security - IP (priority 0)
> @@ -2997,7 +3042,7 @@ build_lswitch_flows(struct hmap *datapaths, struct
> hmap *ports,
>              build_port_security_l2("eth.dst", op->ps_addrs,
> op->n_ps_addrs,
>                                     &match);
>              ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
> -                          ds_cstr(&match), "output;");
> +                          ds_cstr(&match), "next;");
>          } else {
>              ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
>                            ds_cstr(&match), "drop;");
> @@ -3008,6 +3053,30 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>          }
>      }
>  +    /* Egress table 8: Sampling (priority 50) */
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (!op->nbsp || !op->nbsp->ipfix_options) {
> +            continue;
> +        }
> +
> +    ds_clear(&match);
> +    ds_put_format(&match, "outport == %s", op->json_key);
> +
> +    ds_clear(&actions);
> +    format_sample(&actions, op->nbsp->ipfix_options);
> +    ds_put_format(&actions, "output;");
> +    ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_SAMPLE, 50,
> +              ds_cstr(&match), ds_cstr(&actions));
> +    }
> +
> +    /* Egress table 8: Sampling (priority 0) - default output action */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
> +            continue;
> +        }
> +    ovn_lflow_add(lflows, od, S_SWITCH_OUT_SAMPLE, 0, "1", "output;");
> +    }
> +
>      ds_destroy(&match);
>      ds_destroy(&actions);
>  }
> diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema
> index b7e70aa..fa95e89 100644
> --- a/ovn/ovn-nb.ovsschema
> +++ b/ovn/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Northbound",
> -    "version": "5.3.3",
> -    "cksum": "2442952958 9945",
> +    "version": "5.3.4",
> +    "cksum": "859559804 10811",
>      "tables": {
>          "NB_Global": {
>              "columns": {
> @@ -79,6 +79,11 @@
>                                              "refType": "weak"},
>                                   "min": 0,
>                                   "max": 1}},
> +                "ipfix_options": {"type": {"key": {"type": "uuid",
> +                                           "refTable": "IPFIX_Options",
> +                                           "refType": "weak"},
> +                                 "min": 0,
> +                                 "max": 1}},
>                  "external_ids": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
> @@ -194,6 +199,27 @@
>                  "external_ids": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
> -            "isRoot": true}
> +            "isRoot": true},
> +    "IPFIX_Options": {
> +        "columns": {
> +        "collector_set_id": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 0,
> +            "maxInteger": 4294967295}}},
> +        "sampling": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 1,
> +            "maxInteger": 65535}}},
> +        "obs_domain_id": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 0,
> +            "maxInteger": 4294967295},
> +            "min": 0, "max": 1}},
> +        "obs_point_id": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 0,
> +            "maxInteger": 4294967295},
> +            "min": 0, "max": 1}}},
> +                "isRoot": true}
>      }
>  }
> diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
> index c45a444..382b0ca 100644
> --- a/ovn/ovn-nb.xml
> +++ b/ovn/ovn-nb.xml
> @@ -629,6 +629,12 @@
>          See <em>External IDs</em> at the beginning of this document.
>        </column>
>      </group>
> +
> +    <column name="ipfix_options">
> +      This column defines the IPFIX options (see <ref
> table="IPFIX_Options"/>
> +      table) for this port to use. If unset, no IPFIX flows are
> genreated for
> +      traffic on this logical port.
> +    </column>
>    </table>
>     <table name="Address_Set" title="Address Sets">
> @@ -1356,4 +1362,34 @@
>        </column>
>      </group>
>    </table>
> +
> +  <table name="IPFIX_Options" title="IPFIX Options">
> +    <p>
> +      This table allows to configure IPFIX flow sample at logical
> switch or
> +      port level. Currently, OVN doesn't create
> Flow_Sample_Collector_Set or
> +      IPFIX entries at OVS where physical ports actually reside. This is
> +      considered to be a CMS task.
> +    </p>
> +
> +    <column name="collector_set_id">
> +      The Flow_Sample_Collector_Set entry identifier on target OVS
> instance.
> +      This is an integer value between 0 and 2^32-1. Also see the note
> above.
> +    </column>
> +
> +    <column name="sampling">
> +      The rate at which packets should be sampled and sent to each target
> +      collector. Setting it to N means that in average, one of N packets
> +      will be sampled.
> +    </column>
> +
> +    <column name="obs_domain_id">
> +      The IPFIX Observation Domain ID sent in each IPFIX packet.  If not
> +      specified, defaults to 0.
> +    </column>
> +
> +    <column name="obs_point_id">
> +      The IPFIX Observation Point ID sent in each IPFIX flow record. If
> not
> +      specified, defaults to 0.
> +    </column>
> +  </table>
>  </database>
> diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
> index 2148665..d112614 100644
> --- a/ovn/utilities/ovn-nbctl.c
> +++ b/ovn/utilities/ovn-nbctl.c
> @@ -2163,6 +2163,11 @@ static const struct ctl_table_class tables[] = {
>         NULL},
>        {NULL, NULL, NULL}}},
>  +    {&nbrec_table_ipfix_options,
> +     {{&nbrec_table_ipfix_options, NULL,
> +       NULL},
> +      {NULL, NULL, NULL}}},
> +
>      {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}
>  };
>  
> diff --git a/ovn/utilities/ovn-trace.c b/ovn/utilities/ovn-trace.c
> index f5607df..f31301c 100644
> --- a/ovn/utilities/ovn-trace.c
> +++ b/ovn/utilities/ovn-trace.c
> @@ -1289,6 +1289,10 @@ trace_actions(const struct ovnact *ovnacts,
> size_t ovnacts_len,
>          case OVNACT_PUT_DHCPV6_OPTS:
>              execute_put_dhcp_opts(ovnact_get_PUT_DHCPV6_OPTS(a), uflow);
>              break;
> +
> +    case OVNACT_SAMPLE:
> +        /* Nothing to do for tracing */
> +        break;
>          }
>       }
Valentine Sinitsyn Oct. 21, 2016, 7:58 a.m. UTC | #2
Hi all,

Ping here :)

During last few weeks I posted several questions regarding IPFIX 
implementation in OVS, and also a patch to improve (from my POV of 
course) the state of things. None of them yielded any responses, 
unfortunately. Am I doing anything wrong (such as violating some 
guidelines), or there is just a little interest in using OVS for IPFIX 
export?

Best,
Valentine

On 14.10.2016 16:35, Valentine Sinitsyn wrote:
> Hi all,
>
> This is a quick attempt to implement sample action at logical port
> level.The goal is to export IPFIX flows for logical ports, yet it is
> easy to extend this approach to logical switches as well.
>
> Nothing is done to provision OVS instances with required
> Flow_Sample_Collector_Set and IPFIX entries at this point.
>
> Does the approach I take looks sensible? If so, I can add tests and
> re-send this patch for in-depth review.
>
> Many thanks.
>
> Signed-off-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
>
> ---
>  include/ovn/actions.h     |  12 +++++-
>  ovn/lib/actions.c         | 102
> ++++++++++++++++++++++++++++++++++++++++++++++
>  ovn/northd/ovn-northd.c   |  77 ++++++++++++++++++++++++++++++++--
>  ovn/ovn-nb.ovsschema      |  32 +++++++++++++--
>  ovn/ovn-nb.xml            |  36 ++++++++++++++++
>  ovn/utilities/ovn-nbctl.c |   5 +++
>  ovn/utilities/ovn-trace.c |   4 ++
>  7 files changed, 260 insertions(+), 8 deletions(-)
>
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 2905937..7dc4bb9 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -66,7 +66,8 @@ struct simap;
>      OVNACT(GET_ND,        ovnact_get_mac_bind)      \
>      OVNACT(PUT_ND,        ovnact_put_mac_bind)      \
>      OVNACT(PUT_DHCPV4_OPTS, ovnact_put_dhcp_opts)   \
> -    OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts)
> +    OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts)   \
> +    OVNACT(SAMPLE,      ovnact_sample)
>   /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -219,6 +220,15 @@ struct ovnact_put_dhcp_opts {
>      size_t n_options;
>  };
>  +/* OVNACT_SAMPLE */
> +struct ovnact_sample {
> +    struct ovnact ovnact;
> +    uint32_t collector_set_id;
> +    uint32_t probability;
> +    uint32_t obs_domain_id;
> +    uint32_t obs_point_id;
> +};
> +
>  /* Internal use by the helpers below. */
>  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
> index 59131dd..e1debb3 100644
> --- a/ovn/lib/actions.c
> +++ b/ovn/lib/actions.c
> @@ -1614,6 +1614,106 @@ free_PUT_DHCPV6_OPTS(struct ovnact_put_dhcp_opts
> *pdo)
>  {
>      free_put_dhcp_opts(pdo);
>  }
> +
> +static bool
> +parse_sample_arg_get_int_helper(struct lexer *lexer, uint32_t *out)
> +{
> +    if (!lexer_force_match(lexer, LEX_T_EQUALS)) {
> +    return false;
> +    }
> +    if (lexer->token.type == LEX_T_INTEGER) {
> +    *out = ntohl(lexer->token.value.be32_int);
> +    } else {
> +    lexer_syntax_error(lexer, "expecting integer");
> +    return false;
> +    }
> +    lexer_get(lexer);
> +    return true;
> +}
> +
> +static void
> +parse_sample_args(struct action_context *ctx,
> +                  struct ovnact_sample *cc)
> +{
> +    bool ok;
> +
> +    if (lexer_match_id(ctx->lexer, "collector_set_id")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->collector_set_id);
> +    if (!ok)
> +        return;
> +    } else if (lexer_match_id(ctx->lexer, "probability")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->probability);
> +    if (!ok)
> +        return;
> +    } else if (lexer_match_id(ctx->lexer, "obs_domain_id")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->obs_domain_id);
> +    if (!ok)
> +        return;
> +    } else if (lexer_match_id(ctx->lexer, "obs_point_id")) {
> +    ok = parse_sample_arg_get_int_helper(ctx->lexer,
> +                                         &cc->obs_point_id);
> +    if (!ok)
> +        return;
> +    } else {
> +        lexer_syntax_error(ctx->lexer, NULL);
> +    }
> +}
> +
> +static void
> +parse_SAMPLE(struct action_context *ctx)
> +{
> +    struct ovnact_sample *sample = ovnact_put_SAMPLE(ctx->ovnacts);
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +            parse_sample_args(ctx, sample);
> +            if (ctx->lexer->error) {
> +                return;
> +            }
> +            lexer_match(ctx->lexer, LEX_T_COMMA);
> +        }
> +    }
> +
> +    if (!sample->probability)
> +    lexer_error(ctx->lexer, "sample probability can't be zero");
> +}
> +
> +static void
> +encode_SAMPLE(const struct ovnact_sample *sample,
> +              const struct ovnact_encode_params *ep OVS_UNUSED,
> +              struct ofpbuf *ofpacts)
> +{
> +    struct ofpact_sample *action = ofpact_put_SAMPLE(ofpacts);
> +    action->collector_set_id = sample->collector_set_id;
> +    /* sampling is 16 bit wide in the northbound database */
> +    action->probability = (uint16_t)sample->probability;
> +    action->obs_domain_id = sample->obs_domain_id;
> +    action->obs_point_id = sample->obs_point_id;
> +    action->sampling_port = OFPP_NONE;
> +}
> +
> +static void
> +format_SAMPLE(const struct ovnact_sample *sample, struct ds *s)
> +{
> +    ds_put_format(s,
> +              "sample(collector_set_id=%" PRIu32 \
> +          ", probability=%" PRIu32 \
> +          ", obs_domain_id=%" PRIu32 \
> +          ", obs_point_id=%" PRIu32 ")",
> +          sample->collector_set_id,
> +          sample->probability,
> +          sample->obs_domain_id,
> +          sample->obs_point_id);
> +}
> +
> +static void
> +free_SAMPLE(struct ovnact_sample *sample OVS_UNUSED)
> +{
> +    /* Nothing to do here */
> +}
> +
>  
>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>  static void
> @@ -1685,6 +1785,8 @@ parse_action(struct action_context *ctx)
>          parse_get_mac_bind(ctx, 128, ovnact_put_GET_ND(ctx->ovnacts));
>      } else if (lexer_match_id(ctx->lexer, "put_nd")) {
>          parse_put_mac_bind(ctx, 128, ovnact_put_PUT_ND(ctx->ovnacts));
> +    } else if (lexer_match_id(ctx->lexer, "sample")) {
> +    parse_SAMPLE(ctx);
>      } else {
>          lexer_syntax_error(ctx->lexer, "expecting action");
>      }
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index eeeb41d..fd3616e 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -110,6 +110,7 @@ enum ovn_stage {
>      PIPELINE_STAGE(SWITCH, IN,  DHCP_OPTIONS,   10,
> "ls_in_dhcp_options") \
>      PIPELINE_STAGE(SWITCH, IN,  DHCP_RESPONSE,  11,
> "ls_in_dhcp_response") \
>      PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,        12, "ls_in_l2_lkup")    \
> +    PIPELINE_STAGE(SWITCH, IN,  SAMPLE,         13, "ls_in_sample")      \
>                                                                        \
>      /* Logical switch egress stages. */                               \
>      PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       0, "ls_out_pre_lb")     \
> @@ -120,6 +121,7 @@ enum ovn_stage {
>      PIPELINE_STAGE(SWITCH, OUT, STATEFUL,     5, "ls_out_stateful")    \
>      PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP,  6, "ls_out_port_sec_ip")
>    \
>      PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2,  7, "ls_out_port_sec_l2")
>    \
> +    PIPELINE_STAGE(SWITCH, OUT, SAMPLE,       8, "ls_out_sample")      \
>                                                                        \
>      /* Logical router ingress stages. */                              \
>      PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
> @@ -2591,6 +2593,24 @@ build_stateful(struct ovn_datapath *od, struct
> hmap *lflows)
>  }
>   static void
> +format_sample(struct ds *actions, const struct nbrec_ipfix_options
> *ipfix_opts)
> +{
> +    ds_put_format(actions,
> +              "sample(collector_set_id=%" PRIu64 ", probability=%" PRIu64,
> +          ipfix_opts->collector_set_id, ipfix_opts->sampling);
> +
> +    if (ipfix_opts->obs_domain_id)
> +    ds_put_format(actions,
> +              ", obs_domain_id=%" PRIu64, *ipfix_opts->obs_domain_id);
> +
> +    if (ipfix_opts->obs_point_id)
> +    ds_put_format(actions,
> +              ", obs_point_id=%" PRIu64, *ipfix_opts->obs_point_id);
> +
> +    ds_put_cstr(actions, "); ");
> +}
> +
> +static void
>  build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>                      struct hmap *lflows, struct hmap *mcgroups)
>  {
> @@ -2920,11 +2940,12 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>                                ETH_ADDR_ARGS(mac));
>                   ds_clear(&actions);
> -                ds_put_format(&actions, "outport = %s; output;",
> op->json_key);
> +                ds_put_format(&actions, "outport = %s; next;",
> op->json_key);
>                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
>                                ds_cstr(&match), ds_cstr(&actions));
>              } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
>                  if (lsp_is_enabled(op->nbsp)) {
> +            /* TODO: Consider sampling here as well */
>                      ovn_multicast_add(mcgroups, &mc_unknown, op);
>                      op->od->has_unknown = true;
>                  }
> @@ -2939,7 +2960,7 @@ build_lswitch_flows(struct hmap *datapaths, struct
> hmap *ports,
>                                ETH_ADDR_ARGS(mac));
>                   ds_clear(&actions);
> -                ds_put_format(&actions, "outport = %s; output;",
> op->json_key);
> +                ds_put_format(&actions, "outport = %s; next;",
> op->json_key);
>                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
>                                ds_cstr(&match), ds_cstr(&actions));
>              } else {
> @@ -2960,8 +2981,32 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>           if (od->has_unknown) {
>              ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
> -                          "outport = \""MC_UNKNOWN"\"; output;");
> +                          "outport = \""MC_UNKNOWN"\"; next;");
> +        }
> +    }
> +
> +    /* Ingress table 13: Sampling (priority 50) */
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (!op->nbsp || !op->nbsp->ipfix_options) {
> +            continue;
> +        }
> +
> +    ds_clear(&match);
> +    ds_put_format(&match, "inport == %s", op->json_key);
> +
> +    ds_clear(&actions);
> +    format_sample(&actions, op->nbsp->ipfix_options);
> +    ds_put_format(&actions, "output;");
> +    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_SAMPLE, 50,
> +              ds_cstr(&match), ds_cstr(&actions));
> +    }
> +
> +    /* Ingress table 13: Sampling (priority 0) - default output action */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
> +            continue;
>          }
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_SAMPLE, 0, "1", "output;");
>      }
>       /* Egress tables 6: Egress port security - IP (priority 0)
> @@ -2997,7 +3042,7 @@ build_lswitch_flows(struct hmap *datapaths, struct
> hmap *ports,
>              build_port_security_l2("eth.dst", op->ps_addrs,
> op->n_ps_addrs,
>                                     &match);
>              ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
> -                          ds_cstr(&match), "output;");
> +                          ds_cstr(&match), "next;");
>          } else {
>              ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
>                            ds_cstr(&match), "drop;");
> @@ -3008,6 +3053,30 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>          }
>      }
>  +    /* Egress table 8: Sampling (priority 50) */
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (!op->nbsp || !op->nbsp->ipfix_options) {
> +            continue;
> +        }
> +
> +    ds_clear(&match);
> +    ds_put_format(&match, "outport == %s", op->json_key);
> +
> +    ds_clear(&actions);
> +    format_sample(&actions, op->nbsp->ipfix_options);
> +    ds_put_format(&actions, "output;");
> +    ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_SAMPLE, 50,
> +              ds_cstr(&match), ds_cstr(&actions));
> +    }
> +
> +    /* Egress table 8: Sampling (priority 0) - default output action */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
> +            continue;
> +        }
> +    ovn_lflow_add(lflows, od, S_SWITCH_OUT_SAMPLE, 0, "1", "output;");
> +    }
> +
>      ds_destroy(&match);
>      ds_destroy(&actions);
>  }
> diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema
> index b7e70aa..fa95e89 100644
> --- a/ovn/ovn-nb.ovsschema
> +++ b/ovn/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Northbound",
> -    "version": "5.3.3",
> -    "cksum": "2442952958 9945",
> +    "version": "5.3.4",
> +    "cksum": "859559804 10811",
>      "tables": {
>          "NB_Global": {
>              "columns": {
> @@ -79,6 +79,11 @@
>                                              "refType": "weak"},
>                                   "min": 0,
>                                   "max": 1}},
> +                "ipfix_options": {"type": {"key": {"type": "uuid",
> +                                           "refTable": "IPFIX_Options",
> +                                           "refType": "weak"},
> +                                 "min": 0,
> +                                 "max": 1}},
>                  "external_ids": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
> @@ -194,6 +199,27 @@
>                  "external_ids": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
> -            "isRoot": true}
> +            "isRoot": true},
> +    "IPFIX_Options": {
> +        "columns": {
> +        "collector_set_id": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 0,
> +            "maxInteger": 4294967295}}},
> +        "sampling": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 1,
> +            "maxInteger": 65535}}},
> +        "obs_domain_id": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 0,
> +            "maxInteger": 4294967295},
> +            "min": 0, "max": 1}},
> +        "obs_point_id": {
> +            "type": {"key": {"type": "integer",
> +            "minInteger": 0,
> +            "maxInteger": 4294967295},
> +            "min": 0, "max": 1}}},
> +                "isRoot": true}
>      }
>  }
> diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
> index c45a444..382b0ca 100644
> --- a/ovn/ovn-nb.xml
> +++ b/ovn/ovn-nb.xml
> @@ -629,6 +629,12 @@
>          See <em>External IDs</em> at the beginning of this document.
>        </column>
>      </group>
> +
> +    <column name="ipfix_options">
> +      This column defines the IPFIX options (see <ref
> table="IPFIX_Options"/>
> +      table) for this port to use. If unset, no IPFIX flows are
> genreated for
> +      traffic on this logical port.
> +    </column>
>    </table>
>     <table name="Address_Set" title="Address Sets">
> @@ -1356,4 +1362,34 @@
>        </column>
>      </group>
>    </table>
> +
> +  <table name="IPFIX_Options" title="IPFIX Options">
> +    <p>
> +      This table allows to configure IPFIX flow sample at logical
> switch or
> +      port level. Currently, OVN doesn't create
> Flow_Sample_Collector_Set or
> +      IPFIX entries at OVS where physical ports actually reside. This is
> +      considered to be a CMS task.
> +    </p>
> +
> +    <column name="collector_set_id">
> +      The Flow_Sample_Collector_Set entry identifier on target OVS
> instance.
> +      This is an integer value between 0 and 2^32-1. Also see the note
> above.
> +    </column>
> +
> +    <column name="sampling">
> +      The rate at which packets should be sampled and sent to each target
> +      collector. Setting it to N means that in average, one of N packets
> +      will be sampled.
> +    </column>
> +
> +    <column name="obs_domain_id">
> +      The IPFIX Observation Domain ID sent in each IPFIX packet.  If not
> +      specified, defaults to 0.
> +    </column>
> +
> +    <column name="obs_point_id">
> +      The IPFIX Observation Point ID sent in each IPFIX flow record. If
> not
> +      specified, defaults to 0.
> +    </column>
> +  </table>
>  </database>
> diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
> index 2148665..d112614 100644
> --- a/ovn/utilities/ovn-nbctl.c
> +++ b/ovn/utilities/ovn-nbctl.c
> @@ -2163,6 +2163,11 @@ static const struct ctl_table_class tables[] = {
>         NULL},
>        {NULL, NULL, NULL}}},
>  +    {&nbrec_table_ipfix_options,
> +     {{&nbrec_table_ipfix_options, NULL,
> +       NULL},
> +      {NULL, NULL, NULL}}},
> +
>      {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}
>  };
>  
> diff --git a/ovn/utilities/ovn-trace.c b/ovn/utilities/ovn-trace.c
> index f5607df..f31301c 100644
> --- a/ovn/utilities/ovn-trace.c
> +++ b/ovn/utilities/ovn-trace.c
> @@ -1289,6 +1289,10 @@ trace_actions(const struct ovnact *ovnacts,
> size_t ovnacts_len,
>          case OVNACT_PUT_DHCPV6_OPTS:
>              execute_put_dhcp_opts(ovnact_get_PUT_DHCPV6_OPTS(a), uflow);
>              break;
> +
> +    case OVNACT_SAMPLE:
> +        /* Nothing to do for tracing */
> +        break;
>          }
>       }
Ben Pfaff Nov. 29, 2016, 12:21 a.m. UTC | #3
On Fri, Oct 14, 2016 at 04:35:46PM +0500, Valentine Sinitsyn wrote:
> This is a quick attempt to implement sample action at logical port level.The
> goal is to export IPFIX flows for logical ports, yet it is easy to extend
> this approach to logical switches as well.
> 
> Nothing is done to provision OVS instances with required
> Flow_Sample_Collector_Set and IPFIX entries at this point.
> 
> Does the approach I take looks sensible? If so, I can add tests and re-send
> this patch for in-depth review.
> 
> Many thanks.
> 
> Signed-off-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>

Sorry about the long delay in review.  It's been a difficult month.

This is pretty cool!  The integration among OVS and OVN and IPFIX is
graceful.

The part that worries me is the CMS integration.  Have you actually
built that integration already (for which CMS)?  I have two concerns.
First, I'd prefer to see at least one CMS (probably OpenStack) support
this at or around the time that it goes into OVN.  Second, I have some
skepticism around the idea that the CMS should configure the
Flow_Sample_Collector_Set, etc., because OVN doesn't currently require
the CMS to have any connectivity to OVSDB on each of the hypervisors and
this would require the CMS to add that support.

Do you have any thoughts about supporting other monitoring technology
that OVS supports (e.g. sFlow) using similar techniques?

I hope that the long delay does not discourage you from following up.  I
hope to be more responsive now.
Valentine Sinitsyn Nov. 29, 2016, 2:22 p.m. UTC | #4
Hi Ben,

On 29.11.2016 05:21, Ben Pfaff wrote:
> On Fri, Oct 14, 2016 at 04:35:46PM +0500, Valentine Sinitsyn wrote:
>> This is a quick attempt to implement sample action at logical port level.The
>> goal is to export IPFIX flows for logical ports, yet it is easy to extend
>> this approach to logical switches as well.
>>
>> Nothing is done to provision OVS instances with required
>> Flow_Sample_Collector_Set and IPFIX entries at this point.
>>
>> Does the approach I take looks sensible? If so, I can add tests and re-send
>> this patch for in-depth review.
>>
>> Many thanks.
>>
>> Signed-off-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
>
> Sorry about the long delay in review.  It's been a difficult month.
No problem.

>
> This is pretty cool!  The integration among OVS and OVN and IPFIX is
> graceful.
>
> The part that worries me is the CMS integration.  Have you actually
> built that integration already (for which CMS)?  I have two concerns.
> First, I'd prefer to see at least one CMS (probably OpenStack) support
> this at or around the time that it goes into OVN.  Second, I have some
> skepticism around the idea that the CMS should configure the
> Flow_Sample_Collector_Set, etc., because OVN doesn't currently require
> the CMS to have any connectivity to OVSDB on each of the hypervisors and
> this would require the CMS to add that support.
I agree that this particular bit is somewhat hacky. We plan to follow 
this route for an in-house CMS we build, but I doubt OpenStack community 
would pickup the idea. What alternatives do you see here? Having 
collector config at south db level doesn't seem clear either. Think I 
want to configure collector at 127.0.0.1:5900 - which localhost does 
this entry refer to?

If the sample() integration looks good, CMS assumptions aside, is there 
a chance to merge it as a stand-alone action? That's true no publicly 
available CMS would use it for a while, but when they decide to, the 
code would already be there. And the code is not dead, as we'll be using 
it as well.

>
> Do you have any thoughts about supporting other monitoring technology
> that OVS supports (e.g. sFlow) using similar techniques?
I haven't targeted any of them specifically, but it doesn't seem to be a 
daunting task. One only need some way to associate sample() instance and 
a sFlow receiver the same way collector_set_id does for IPFIX.

I'd suggest to generalize Flow_Sample_Collector_Set somehow, but we 
agreed configuring things through this table in OVN scenario is 
suboptimal. Any thoughts?

>
> I hope that the long delay does not discourage you from following up.  I
> hope to be more responsive now.
Sure, I'm open for the discussion.

Thanks,
Valentine

>
Ben Pfaff Nov. 30, 2016, 1:18 a.m. UTC | #5
On Tue, Nov 29, 2016 at 07:22:32PM +0500, Valentine Sinitsyn wrote:
> On 29.11.2016 05:21, Ben Pfaff wrote:
> >On Fri, Oct 14, 2016 at 04:35:46PM +0500, Valentine Sinitsyn wrote:
> >>This is a quick attempt to implement sample action at logical port
> >>level.The goal is to export IPFIX flows for logical ports, yet it is
> >>easy to extend this approach to logical switches as well.
> >>
> >>Nothing is done to provision OVS instances with required
> >>Flow_Sample_Collector_Set and IPFIX entries at this point.
>
> >This is pretty cool!  The integration among OVS and OVN and IPFIX is
> >graceful.
> >
> >The part that worries me is the CMS integration.  Have you actually
> >built that integration already (for which CMS)?  I have two concerns.
> >First, I'd prefer to see at least one CMS (probably OpenStack) support
> >this at or around the time that it goes into OVN.  Second, I have some
> >skepticism around the idea that the CMS should configure the
> >Flow_Sample_Collector_Set, etc., because OVN doesn't currently require
> >the CMS to have any connectivity to OVSDB on each of the hypervisors and
> >this would require the CMS to add that support.
> I agree that this particular bit is somewhat hacky. We plan to follow this
> route for an in-house CMS we build, but I doubt OpenStack community would
> pickup the idea. What alternatives do you see here? Having collector config
> at south db level doesn't seem clear either. Think I want to configure
> collector at 127.0.0.1:5900 - which localhost does this entry refer to?

Is this a common way to configure IPFIX?  I had been under the
impression that generally there's one or a few collectors in a network,
to which each switch forwards packets.  If it's common to use a
per-hypervisor collector, then that might actually makes thing easier,
since that would be easy for ovn-controller to configure into OVS on
each hypervisor.

Otherwise, I'm inclined to at least learn what the requirements would be
for common deployments of IPFIX.  Even if we don't implement it them (or
all of them), it's important to me to know what we're leaving out so
that what we add now is built in a way that it's gracefully extensible
later.

For example: if a packet should be sent to a collector, should the
collector be chosen based on the packet's logical network, or based on
the packet's physical network (the hypervisor it's ingressing or
egressing), or on some combination of those?

I also find myself wondering whether logical port level is the right
level at which to choose whether to sample packets.  Will OVN users want
finer-grained control over sampling and, if so, would it make more sense
to add an ACL-like table for that purpose at the northbound level?

> If the sample() integration looks good, CMS assumptions aside, is there a
> chance to merge it as a stand-alone action? That's true no publicly
> available CMS would use it for a while, but when they decide to, the code
> would already be there. And the code is not dead, as we'll be using it as
> well.

It's better than no users at all.

> >Do you have any thoughts about supporting other monitoring technology
> >that OVS supports (e.g. sFlow) using similar techniques?
> I haven't targeted any of them specifically, but it doesn't seem to be a
> daunting task. One only need some way to associate sample() instance and a
> sFlow receiver the same way collector_set_id does for IPFIX.
> 
> I'd suggest to generalize Flow_Sample_Collector_Set somehow, but we agreed
> configuring things through this table in OVN scenario is suboptimal. Any
> thoughts?

Did we have an earlier discussion?  I've spent a few minutes searching
my email archive and I don't see one.  If there was one, can you point
it out?

Thanks,

Ben.
Valentine Sinitsyn Nov. 30, 2016, 8:31 p.m. UTC | #6
On 30.11.2016 06:18, Ben Pfaff wrote:
> On Tue, Nov 29, 2016 at 07:22:32PM +0500, Valentine Sinitsyn wrote:
>> On 29.11.2016 05:21, Ben Pfaff wrote:
>>> On Fri, Oct 14, 2016 at 04:35:46PM +0500, Valentine Sinitsyn wrote:
>>>> This is a quick attempt to implement sample action at logical port
>>>> level.The goal is to export IPFIX flows for logical ports, yet it is
>>>> easy to extend this approach to logical switches as well.
>>>>
>>>> Nothing is done to provision OVS instances with required
>>>> Flow_Sample_Collector_Set and IPFIX entries at this point.
>>
>>> This is pretty cool!  The integration among OVS and OVN and IPFIX is
>>> graceful.
>>>
>>> The part that worries me is the CMS integration.  Have you actually
>>> built that integration already (for which CMS)?  I have two concerns.
>>> First, I'd prefer to see at least one CMS (probably OpenStack) support
>>> this at or around the time that it goes into OVN.  Second, I have some
>>> skepticism around the idea that the CMS should configure the
>>> Flow_Sample_Collector_Set, etc., because OVN doesn't currently require
>>> the CMS to have any connectivity to OVSDB on each of the hypervisors and
>>> this would require the CMS to add that support.
>> I agree that this particular bit is somewhat hacky. We plan to follow this
>> route for an in-house CMS we build, but I doubt OpenStack community would
>> pickup the idea. What alternatives do you see here? Having collector config
>> at south db level doesn't seem clear either. Think I want to configure
>> collector at 127.0.0.1:5900 - which localhost does this entry refer to?
>
> Is this a common way to configure IPFIX?  I had been under the
> impression that generally there's one or a few collectors in a network,
> to which each switch forwards packets.  If it's common to use a
> per-hypervisor collector, then that might actually makes thing easier,
> since that would be easy for ovn-controller to configure into OVS on
> each hypervisor.
Running collectors local to hypervisors is what we do here. I can't say 
if it's a common scenario, but given that IPFIX is most often UDP which 
can be lost, it usually makes sense to keep collectors and exporters as 
close as possible.

>
> Otherwise, I'm inclined to at least learn what the requirements would be
> for common deployments of IPFIX.  Even if we don't implement it them (or
> all of them), it's important to me to know what we're leaving out so
> that what we add now is built in a way that it's gracefully extensible
> later.
>
> For example: if a packet should be sent to a collector, should the
> collector be chosen based on the packet's logical network, or based on
> the packet's physical network (the hypervisor it's ingressing or
> egressing), or on some combination of those?
>
> I also find myself wondering whether logical port level is the right
> level at which to choose whether to sample packets.  Will OVN users want
> finer-grained control over sampling and, if so, would it make more sense
> to add an ACL-like table for that purpose at the northbound level?
You mean using an lflow match to control when the "sample" action will 
trigger, rather than hard-wiring these actions to logical ports via 
"ipfix_options"? This sounds reasonable and not to hard to implement, 
given that we already have these tables in the southbound db.

>
>> If the sample() integration looks good, CMS assumptions aside, is there a
>> chance to merge it as a stand-alone action? That's true no publicly
>> available CMS would use it for a while, but when they decide to, the code
>> would already be there. And the code is not dead, as we'll be using it as
>> well.
>
> It's better than no users at all.
>
>>> Do you have any thoughts about supporting other monitoring technology
>>> that OVS supports (e.g. sFlow) using similar techniques?
>> I haven't targeted any of them specifically, but it doesn't seem to be a
>> daunting task. One only need some way to associate sample() instance and a
>> sFlow receiver the same way collector_set_id does for IPFIX.
>>
>> I'd suggest to generalize Flow_Sample_Collector_Set somehow, but we agreed
>> configuring things through this table in OVN scenario is suboptimal. Any
>> thoughts?
>
> Did we have an earlier discussion?  I've spent a few minutes searching
> my email archive and I don't see one.  If there was one, can you point
> it out?
No, no prior discussion, sorry for being unclear.
I was referring to your concerns regarding CMS integration in the 
beginning of this thread.

Thanks,
Valentine
diff mbox

Patch

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 2905937..7dc4bb9 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -66,7 +66,8 @@  struct simap;
      OVNACT(GET_ND,        ovnact_get_mac_bind)      \
      OVNACT(PUT_ND,        ovnact_put_mac_bind)      \
      OVNACT(PUT_DHCPV4_OPTS, ovnact_put_dhcp_opts)   \
-    OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts)
+    OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts)   \
+    OVNACT(SAMPLE,	  ovnact_sample)
   /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
  enum OVS_PACKED_ENUM ovnact_type {
@@ -219,6 +220,15 @@  struct ovnact_put_dhcp_opts {
      size_t n_options;
  };
  +/* OVNACT_SAMPLE */
+struct ovnact_sample {
+    struct ovnact ovnact;
+    uint32_t collector_set_id;
+    uint32_t probability;
+    uint32_t obs_domain_id;
+    uint32_t obs_point_id;
+};
+
  /* Internal use by the helpers below. */
  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 59131dd..e1debb3 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -1614,6 +1614,106 @@  free_PUT_DHCPV6_OPTS(struct ovnact_put_dhcp_opts 
*pdo)
  {
      free_put_dhcp_opts(pdo);
  }
+
+static bool
+parse_sample_arg_get_int_helper(struct lexer *lexer, uint32_t *out)
+{
+    if (!lexer_force_match(lexer, LEX_T_EQUALS)) {
+	return false;
+    }
+    if (lexer->token.type == LEX_T_INTEGER) {
+	*out = ntohl(lexer->token.value.be32_int);
+    } else {
+	lexer_syntax_error(lexer, "expecting integer");
+	return false;
+    }
+    lexer_get(lexer);
+    return true;
+}
+
+static void
+parse_sample_args(struct action_context *ctx,
+                  struct ovnact_sample *cc)
+{
+    bool ok;
+
+    if (lexer_match_id(ctx->lexer, "collector_set_id")) {
+	ok = parse_sample_arg_get_int_helper(ctx->lexer,
+	                                     &cc->collector_set_id);
+	if (!ok)
+	    return;
+    } else if (lexer_match_id(ctx->lexer, "probability")) {
+	ok = parse_sample_arg_get_int_helper(ctx->lexer,
+	                                     &cc->probability);
+	if (!ok)
+	    return;
+    } else if (lexer_match_id(ctx->lexer, "obs_domain_id")) {
+	ok = parse_sample_arg_get_int_helper(ctx->lexer,
+	                                     &cc->obs_domain_id);
+	if (!ok)
+	    return;
+    } else if (lexer_match_id(ctx->lexer, "obs_point_id")) {
+	ok = parse_sample_arg_get_int_helper(ctx->lexer,
+	                                     &cc->obs_point_id);
+	if (!ok)
+	    return;
+    } else {
+        lexer_syntax_error(ctx->lexer, NULL);
+    }
+}
+
+static void
+parse_SAMPLE(struct action_context *ctx)
+{
+    struct ovnact_sample *sample = ovnact_put_SAMPLE(ctx->ovnacts);
+    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+            parse_sample_args(ctx, sample);
+            if (ctx->lexer->error) {
+                return;
+            }
+            lexer_match(ctx->lexer, LEX_T_COMMA);
+        }
+    }
+
+    if (!sample->probability)
+	lexer_error(ctx->lexer, "sample probability can't be zero");
+}
+
+static void
+encode_SAMPLE(const struct ovnact_sample *sample,
+              const struct ovnact_encode_params *ep OVS_UNUSED,
+              struct ofpbuf *ofpacts)
+{
+    struct ofpact_sample *action = ofpact_put_SAMPLE(ofpacts);
+    action->collector_set_id = sample->collector_set_id;
+    /* sampling is 16 bit wide in the northbound database */
+    action->probability = (uint16_t)sample->probability;
+    action->obs_domain_id = sample->obs_domain_id;
+    action->obs_point_id = sample->obs_point_id;
+    action->sampling_port = OFPP_NONE;
+}
+
+static void
+format_SAMPLE(const struct ovnact_sample *sample, struct ds *s)
+{
+    ds_put_format(s,
+	          "sample(collector_set_id=%" PRIu32 \
+		  ", probability=%" PRIu32 \
+		  ", obs_domain_id=%" PRIu32 \
+		  ", obs_point_id=%" PRIu32 ")",
+		  sample->collector_set_id,
+		  sample->probability,
+		  sample->obs_domain_id,
+		  sample->obs_point_id);
+}
+
+static void
+free_SAMPLE(struct ovnact_sample *sample OVS_UNUSED)
+{
+    /* Nothing to do here */
+}
+
  
  /* Parses an assignment or exchange or put_dhcp_opts action. */
  static void
@@ -1685,6 +1785,8 @@  parse_action(struct action_context *ctx)
          parse_get_mac_bind(ctx, 128, ovnact_put_GET_ND(ctx->ovnacts));
      } else if (lexer_match_id(ctx->lexer, "put_nd")) {
          parse_put_mac_bind(ctx, 128, ovnact_put_PUT_ND(ctx->ovnacts));
+    } else if (lexer_match_id(ctx->lexer, "sample")) {
+	parse_SAMPLE(ctx);
      } else {
          lexer_syntax_error(ctx->lexer, "expecting action");
      }
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index eeeb41d..fd3616e 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -110,6 +110,7 @@  enum ovn_stage {
      PIPELINE_STAGE(SWITCH, IN,  DHCP_OPTIONS,   10, 
"ls_in_dhcp_options") \
      PIPELINE_STAGE(SWITCH, IN,  DHCP_RESPONSE,  11, 
"ls_in_dhcp_response") \
      PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,        12, "ls_in_l2_lkup") 
    \
+    PIPELINE_STAGE(SWITCH, IN,  SAMPLE,         13, "ls_in_sample")      \
                                                                        \
      /* Logical switch egress stages. */                               \
      PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       0, "ls_out_pre_lb")     \
@@ -120,6 +121,7 @@  enum ovn_stage {
      PIPELINE_STAGE(SWITCH, OUT, STATEFUL,     5, "ls_out_stateful") 
    \
      PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP,  6, "ls_out_port_sec_ip") 
    \
      PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2,  7, "ls_out_port_sec_l2") 
    \
+    PIPELINE_STAGE(SWITCH, OUT, SAMPLE,       8, "ls_out_sample")      \
                                                                        \
      /* Logical router ingress stages. */                              \
      PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
@@ -2591,6 +2593,24 @@  build_stateful(struct ovn_datapath *od, struct 
hmap *lflows)
  }
   static void
+format_sample(struct ds *actions, const struct nbrec_ipfix_options 
*ipfix_opts)
+{
+    ds_put_format(actions,
+	          "sample(collector_set_id=%" PRIu64 ", probability=%" PRIu64,
+		  ipfix_opts->collector_set_id, ipfix_opts->sampling);
+
+    if (ipfix_opts->obs_domain_id)
+	ds_put_format(actions,
+		      ", obs_domain_id=%" PRIu64, *ipfix_opts->obs_domain_id);
+
+    if (ipfix_opts->obs_point_id)
+	ds_put_format(actions,
+		      ", obs_point_id=%" PRIu64, *ipfix_opts->obs_point_id);
+
+    ds_put_cstr(actions, "); ");
+}
+
+static void
  build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
                      struct hmap *lflows, struct hmap *mcgroups)
  {
@@ -2920,11 +2940,12 @@  build_lswitch_flows(struct hmap *datapaths, 
struct hmap *ports,
                                ETH_ADDR_ARGS(mac));
                   ds_clear(&actions);
-                ds_put_format(&actions, "outport = %s; output;", 
op->json_key);
+                ds_put_format(&actions, "outport = %s; next;", 
op->json_key);
                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                                ds_cstr(&match), ds_cstr(&actions));
              } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
                  if (lsp_is_enabled(op->nbsp)) {
+		    /* TODO: Consider sampling here as well */
                      ovn_multicast_add(mcgroups, &mc_unknown, op);
                      op->od->has_unknown = true;
                  }
@@ -2939,7 +2960,7 @@  build_lswitch_flows(struct hmap *datapaths, struct 
hmap *ports,
                                ETH_ADDR_ARGS(mac));
                   ds_clear(&actions);
-                ds_put_format(&actions, "outport = %s; output;", 
op->json_key);
+                ds_put_format(&actions, "outport = %s; next;", 
op->json_key);
                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                                ds_cstr(&match), ds_cstr(&actions));
              } else {
@@ -2960,8 +2981,32 @@  build_lswitch_flows(struct hmap *datapaths, 
struct hmap *ports,
           if (od->has_unknown) {
              ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
-                          "outport = \""MC_UNKNOWN"\"; output;");
+                          "outport = \""MC_UNKNOWN"\"; next;");
+        }
+    }
+
+    /* Ingress table 13: Sampling (priority 50) */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbsp || !op->nbsp->ipfix_options) {
+            continue;
+        }
+
+	ds_clear(&match);
+	ds_put_format(&match, "inport == %s", op->json_key);
+
+	ds_clear(&actions);
+	format_sample(&actions, op->nbsp->ipfix_options);
+	ds_put_format(&actions, "output;");
+	ovn_lflow_add(lflows, op->od, S_SWITCH_IN_SAMPLE, 50,
+		      ds_cstr(&match), ds_cstr(&actions));
+    }
+
+    /* Ingress table 13: Sampling (priority 0) - default output action */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs) {
+            continue;
          }
+	ovn_lflow_add(lflows, od, S_SWITCH_IN_SAMPLE, 0, "1", "output;");
      }
       /* Egress tables 6: Egress port security - IP (priority 0)
@@ -2997,7 +3042,7 @@  build_lswitch_flows(struct hmap *datapaths, struct 
hmap *ports,
              build_port_security_l2("eth.dst", op->ps_addrs, 
op->n_ps_addrs,
                                     &match);
              ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
-                          ds_cstr(&match), "output;");
+                          ds_cstr(&match), "next;");
          } else {
              ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
                            ds_cstr(&match), "drop;");
@@ -3008,6 +3053,30 @@  build_lswitch_flows(struct hmap *datapaths, 
struct hmap *ports,
          }
      }
  +    /* Egress table 8: Sampling (priority 50) */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbsp || !op->nbsp->ipfix_options) {
+            continue;
+        }
+
+	ds_clear(&match);
+	ds_put_format(&match, "outport == %s", op->json_key);
+
+	ds_clear(&actions);
+	format_sample(&actions, op->nbsp->ipfix_options);
+	ds_put_format(&actions, "output;");
+	ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_SAMPLE, 50,
+		      ds_cstr(&match), ds_cstr(&actions));
+    }
+
+    /* Egress table 8: Sampling (priority 0) - default output action */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs) {
+            continue;
+        }
+	ovn_lflow_add(lflows, od, S_SWITCH_OUT_SAMPLE, 0, "1", "output;");
+    }
+
      ds_destroy(&match);
      ds_destroy(&actions);
  }
diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema
index b7e70aa..fa95e89 100644
--- a/ovn/ovn-nb.ovsschema
+++ b/ovn/ovn-nb.ovsschema
@@ -1,7 +1,7 @@ 
  {
      "name": "OVN_Northbound",
-    "version": "5.3.3",
-    "cksum": "2442952958 9945",
+    "version": "5.3.4",
+    "cksum": "859559804 10811",
      "tables": {
          "NB_Global": {
              "columns": {
@@ -79,6 +79,11 @@ 
                                              "refType": "weak"},
                                   "min": 0,
                                   "max": 1}},
+                "ipfix_options": {"type": {"key": {"type": "uuid",
+                                           "refTable": "IPFIX_Options",
+                                           "refType": "weak"},
+                                 "min": 0,
+                                 "max": 1}},
                  "external_ids": {
                      "type": {"key": "string", "value": "string",
                               "min": 0, "max": "unlimited"}}},
@@ -194,6 +199,27 @@ 
                  "external_ids": {
                      "type": {"key": "string", "value": "string",
                               "min": 0, "max": "unlimited"}}},
-            "isRoot": true}
+            "isRoot": true},
+	"IPFIX_Options": {
+	    "columns": {
+		"collector_set_id": {
+		    "type": {"key": {"type": "integer",
+			"minInteger": 0,
+			"maxInteger": 4294967295}}},
+		"sampling": {
+		    "type": {"key": {"type": "integer",
+			"minInteger": 1,
+			"maxInteger": 65535}}},
+		"obs_domain_id": {
+		    "type": {"key": {"type": "integer",
+			"minInteger": 0,
+			"maxInteger": 4294967295},
+		    "min": 0, "max": 1}},
+		"obs_point_id": {
+		    "type": {"key": {"type": "integer",
+			"minInteger": 0,
+			"maxInteger": 4294967295},
+		    "min": 0, "max": 1}}},
+                "isRoot": true}
      }
  }
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index c45a444..382b0ca 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -629,6 +629,12 @@ 
          See <em>External IDs</em> at the beginning of this document.
        </column>
      </group>
+
+    <column name="ipfix_options">
+      This column defines the IPFIX options (see <ref 
table="IPFIX_Options"/>
+      table) for this port to use. If unset, no IPFIX flows are 
genreated for
+      traffic on this logical port.
+    </column>
    </table>
     <table name="Address_Set" title="Address Sets">
@@ -1356,4 +1362,34 @@ 
        </column>
      </group>
    </table>
+
+  <table name="IPFIX_Options" title="IPFIX Options">
+    <p>
+      This table allows to configure IPFIX flow sample at logical switch or
+      port level. Currently, OVN doesn't create 
Flow_Sample_Collector_Set or
+      IPFIX entries at OVS where physical ports actually reside. This is
+      considered to be a CMS task.
+    </p>
+
+    <column name="collector_set_id">
+      The Flow_Sample_Collector_Set entry identifier on target OVS 
instance.
+      This is an integer value between 0 and 2^32-1. Also see the note 
above.
+    </column>
+
+    <column name="sampling">
+      The rate at which packets should be sampled and sent to each target
+      collector. Setting it to N means that in average, one of N packets
+      will be sampled.
+    </column>
+
+    <column name="obs_domain_id">
+      The IPFIX Observation Domain ID sent in each IPFIX packet.  If not
+      specified, defaults to 0.
+    </column>
+
+    <column name="obs_point_id">
+      The IPFIX Observation Point ID sent in each IPFIX flow record. 
If not
+      specified, defaults to 0.
+    </column>
+  </table>
  </database>
diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
index 2148665..d112614 100644
--- a/ovn/utilities/ovn-nbctl.c
+++ b/ovn/utilities/ovn-nbctl.c
@@ -2163,6 +2163,11 @@  static const struct ctl_table_class tables[] = {
         NULL},
        {NULL, NULL, NULL}}},
  +    {&nbrec_table_ipfix_options,
+     {{&nbrec_table_ipfix_options, NULL,
+       NULL},
+      {NULL, NULL, NULL}}},
+
      {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}
  };
  
diff --git a/ovn/utilities/ovn-trace.c b/ovn/utilities/ovn-trace.c
index f5607df..f31301c 100644
--- a/ovn/utilities/ovn-trace.c
+++ b/ovn/utilities/ovn-trace.c
@@ -1289,6 +1289,10 @@  trace_actions(const struct ovnact *ovnacts, 
size_t ovnacts_len,
          case OVNACT_PUT_DHCPV6_OPTS:
              execute_put_dhcp_opts(ovnact_get_PUT_DHCPV6_OPTS(a), uflow);
              break;
+
+	case OVNACT_SAMPLE:
+	    /* Nothing to do for tracing */
+	    break;
          }
       }