diff mbox series

[ovs-dev,2/2] northd: Provide the option to not use ct.inv in lflows.

Message ID 20210322105911.338961-1-numans@ovn.org
State Changes Requested
Headers show
Series [ovs-dev,1/2] northd: Optimize ct nat for load balancer traffic. | expand

Commit Message

Numan Siddique March 22, 2021, 10:59 a.m. UTC
From: Numan Siddique <numans@ovn.org>

Presently we add 65535 priority lflows in the stages -
'ls_in_acl' and 'ls_out_acl' to drop packets which
match on 'ct.inv'.

This patch provides an option to not use this field in the
logical flow matches for the following
reasons:

 • Some of the smart NICs which support offloading datapath
   flows may not support this field.  In such cases, almost
   all the datapath flows cannot be offloaded if ACLs/load balancers
   are configured on the logical switch datapath.

 • A recent commit in kernel ovs datapath sets the committed
   connection tracking entry to be liberal for out-of-window
   tcp packets (nf_ct_set_tcp_be_liberal()).  Such TCP
   packets will not be marked as invalid.

There are still certain scenarios where a packet can be marked
invalid.  Like
 • If the tcp flags are not correct.

 • If there are checksum errors.

In such cases, the packet will be delivered to the destination
port.  So CMS should evaluate their usecases before enabling
this option.

Signed-off-by: Numan Siddique <numans@ovn.org>
---
 NEWS                 |  5 +++
 northd/lswitch.dl    | 13 +++++++-
 northd/ovn-northd.c  | 41 ++++++++++++++---------
 northd/ovn_northd.dl | 51 +++++++++++++++++++++++------
 ovn-nb.xml           | 11 +++++++
 tests/ovn-northd.at  | 77 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 171 insertions(+), 27 deletions(-)

Comments

Numan Siddique March 24, 2021, 3:58 p.m. UTC | #1
Hi Ben,

Need your eye on this patch for the ddlog stuff.  I was wondering
if there is a better way to do than what I've done.

Not urgent. Please take your time.

Thanks
Numan

On Mon, Mar 22, 2021 at 4:29 PM <numans@ovn.org> wrote:
>
> From: Numan Siddique <numans@ovn.org>
>
> Presently we add 65535 priority lflows in the stages -
> 'ls_in_acl' and 'ls_out_acl' to drop packets which
> match on 'ct.inv'.
>
> This patch provides an option to not use this field in the
> logical flow matches for the following
> reasons:
>
>  • Some of the smart NICs which support offloading datapath
>    flows may not support this field.  In such cases, almost
>    all the datapath flows cannot be offloaded if ACLs/load balancers
>    are configured on the logical switch datapath.
>
>  • A recent commit in kernel ovs datapath sets the committed
>    connection tracking entry to be liberal for out-of-window
>    tcp packets (nf_ct_set_tcp_be_liberal()).  Such TCP
>    packets will not be marked as invalid.
>
> There are still certain scenarios where a packet can be marked
> invalid.  Like
>  • If the tcp flags are not correct.
>
>  • If there are checksum errors.
>
> In such cases, the packet will be delivered to the destination
> port.  So CMS should evaluate their usecases before enabling
> this option.
>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
>  NEWS                 |  5 +++
>  northd/lswitch.dl    | 13 +++++++-
>  northd/ovn-northd.c  | 41 ++++++++++++++---------
>  northd/ovn_northd.dl | 51 +++++++++++++++++++++++------
>  ovn-nb.xml           | 11 +++++++
>  tests/ovn-northd.at  | 77 ++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 171 insertions(+), 27 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 530c5d42fe..3181649ba8 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -7,6 +7,11 @@ Post-v21.03.0
>      (This may take testing and tuning to be effective.)  This version of OVN
>      requires DDLog 0.36.
>    - Introduce ovn-controller incremetal processing engine statistics
> +  - Add a new NB Global option - us_ct_inv_match with the default value of true.
> +    If this is set to false, then the logical field - "ct.inv" will not be
> +    used in the logical flow matches.  CMS can consider setting this to false,
> +    if they want to use smart NICs which don't support offloading datapath flows
> +    with this field used.
>
>  OVN v21.03.0 - 12 Mar 2021
>  -------------------------
> diff --git a/northd/lswitch.dl b/northd/lswitch.dl
> index 4bf8a5b907..1daf249382 100644
> --- a/northd/lswitch.dl
> +++ b/northd/lswitch.dl
> @@ -197,6 +197,7 @@ relation &Switch(
>      ipv6_prefix:       Option<in6_addr>,
>      mcast_cfg:         Ref<McastSwitchCfg>,
>      is_vlan_transparent: bool,
> +    use_ct_inv_match:  bool,
>
>      /* Does this switch have at least one port with type != "router"? */
>      has_non_router_port: bool
> @@ -223,7 +224,8 @@ function ipv6_parse_prefix(s: string): Option<in6_addr> {
>          .ipv6_prefix       = ipv6_prefix,
>          .mcast_cfg         = mcast_cfg,
>          .has_non_router_port = has_non_router_port,
> -        .is_vlan_transparent = is_vlan_transparent) :-
> +        .is_vlan_transparent = is_vlan_transparent,
> +        .use_ct_inv_match = use_ct_inv_match) :-
>      nb::Logical_Switch[ls],
>      LogicalSwitchHasStatefulACL(ls._uuid, has_stateful_acl),
>      LogicalSwitchHasLBVIP(ls._uuid, has_lb_vip),
> @@ -232,6 +234,7 @@ function ipv6_parse_prefix(s: string): Option<in6_addr> {
>      LogicalSwitchLocalnetPorts(ls._uuid, localnet_ports),
>      LogicalSwitchHasNonRouterPort(ls._uuid, has_non_router_port),
>      mcast_cfg in &McastSwitchCfg(.datapath = ls._uuid),
> +    UseCtInvMatch(use_ct_inv_match),
>      var subnet =
>          match (ls.other_config.get("subnet")) {
>              None -> None,
> @@ -744,3 +747,11 @@ function put_svc_monitor_mac(options: Map<string,string>,
>  relation SvcMonitorMac(mac: eth_addr)
>  SvcMonitorMac(get_svc_monitor_mac(options, uuid)) :-
>      nb::NB_Global(._uuid = uuid, .options = options).
> +
> +function can_use_ct_inv_match(options: Map<string,string>): bool {
> +    map_get_bool_def(options, "use_ct_inv_match", true)
> +}
> +
> +relation UseCtInvMatch(use_ct_inv_match: bool)
> +UseCtInvMatch(can_use_ct_inv_match(options)) :-
> +    nb::NB_Global(._uuid = uuid, .options = options).
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 3221fb9ff7..6baed2a418 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4066,6 +4066,10 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>   * logical datapath only by creating a datapath group. */
>  static bool use_logical_dp_groups = false;
>
> +/* If this option is 'true' northd will make use of ct.inv match fields.
> + * Otherwise, it will avoid using it.  The default is true. */
> +static bool use_ct_inv_match = true;
> +
>  /* Adds a row with the specified contents to the Logical_Flow table. */
>  static void
>  ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
> @@ -5681,12 +5685,13 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
>           * for deletion (bit 0 of ct_label is set).
>           *
>           * This is enforced at a higher priority than ACLs can be defined. */
> +        char *match = xasprintf("%s(ct.est && ct.rpl && ct_label.blocked == 1)",
> +                                use_ct_inv_match ? "ct.inv || " : "");
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> -                      "drop;");
> +                      match, "drop;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> -                      "drop;");
> +                      match, "drop;");
> +        free(match);
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -5697,14 +5702,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
>           * direction to hit the currently defined policy from ACLs.
>           *
>           * This is enforced at a higher priority than ACLs can be defined. */
> +        match = xasprintf("ct.est && !ct.rel && !ct.new%s && "
> +                          "ct.rpl && ct_label.blocked == 0",
> +                          use_ct_inv_match ? " && !ct.inv" : "");
> +
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                      "&& ct.rpl && ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                      "&& ct.rpl && ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
> +        free(match);
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -5717,14 +5723,14 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
>           * a dynamically negotiated FTP data channel), but will allow
>           * related traffic such as an ICMP Port Unreachable through
>           * that's generated from a non-listening UDP port.  */
> +        match = xasprintf("!ct.est && ct.rel && !ct.new%s && "
> +                          "ct_label.blocked == 0",
> +                          use_ct_inv_match ? " && !ct.inv" : "");
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                      "&& ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                      "&& ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
> +        free(match);
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -12897,6 +12903,9 @@ ovnnb_db_run(struct northd_context *ctx,
>
>      use_logical_dp_groups = smap_get_bool(&nb->options,
>                                            "use_logical_dp_groups", false);
> +    use_ct_inv_match = smap_get_bool(&nb->options,
> +                                     "use_ct_inv_match", true);
> +
>      /* deprecated, use --event instead */
>      controller_event_en = smap_get_bool(&nb->options,
>                                          "controller_event", false);
> diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl
> index 35e6ab76cb..9e1919cd80 100644
> --- a/northd/ovn_northd.dl
> +++ b/northd/ovn_northd.dl
> @@ -2396,16 +2396,25 @@ var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
>           * for deletion (bit 0 of ct_label is set).
>           *
>           * This is enforced at a higher priority than ACLs can be defined. */
> +        var __match = match (sw.use_ct_inv_match) {
> +             true -> "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> +             false -> "(ct.est && ct.rpl && ct_label.blocked == 1)"
> +        } in
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(IN, ACL),
>               .priority         = 65535,
> -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> +             .__match          = __match,
>               .actions          = "drop;",
>               .external_ids     = map_empty());
> +
> +        var __match = match (sw.use_ct_inv_match) {
> +             true -> "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> +             false -> "(ct.est && ct.rpl && ct_label.blocked == 1)"
> +        } in
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(OUT, ACL),
>               .priority         = 65535,
> -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> +             .__match          = __match,
>               .actions          = "drop;",
>               .external_ids     = map_empty());
>
> @@ -2418,18 +2427,29 @@ var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
>           * direction to hit the currently defined policy from ACLs.
>           *
>           * This is enforced at a higher priority than ACLs can be defined. */
> +        var __match = match (sw.use_ct_inv_match) {
> +             true -> "ct.est && !ct.rel && !ct.new && !ct.inv "
> +                     "&& ct.rpl && ct_label.blocked == 0",
> +             false -> "ct.est && !ct.rel && !ct.new "
> +                      "&& ct.rpl && ct_label.blocked == 0"
> +        } in
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(IN, ACL),
>               .priority         = 65535,
> -             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct.rpl && ct_label.blocked == 0",
> +             .__match          = __match,
>               .actions          = "next;",
>               .external_ids     = map_empty());
> +
> +        var __match = match (sw.use_ct_inv_match) {
> +             true -> "ct.est && !ct.rel && !ct.new && !ct.inv "
> +                     "&& ct.rpl && ct_label.blocked == 0",
> +             false -> "ct.est && !ct.rel && !ct.new "
> +                      "&& ct.rpl && ct_label.blocked == 0"
> +        } in
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(OUT, ACL),
>               .priority         = 65535,
> -             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct.rpl && ct_label.blocked == 0",
> +             .__match          = __match,
>               .actions          = "next;",
>               .external_ids     = map_empty());
>
> @@ -2444,18 +2464,29 @@ var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
>           * a dynamically negotiated FTP data channel), but will allow
>           * related traffic such as an ICMP Port Unreachable through
>           * that's generated from a non-listening UDP port.  */
> +        var __match = match (sw.use_ct_inv_match) {
> +             true -> "!ct.est && ct.rel && !ct.new && !ct.inv "
> +                     "&& ct_label.blocked == 0",
> +             false -> "!ct.est && ct.rel && !ct.new "
> +                      "&& ct_label.blocked == 0",
> +        } in
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(IN, ACL),
>               .priority         = 65535,
> -             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct_label.blocked == 0",
> +             .__match          = __match,
>               .actions          = "next;",
>               .external_ids     = map_empty());
> +
> +        var __match = match (sw.use_ct_inv_match) {
> +             true -> "!ct.est && ct.rel && !ct.new && !ct.inv "
> +                     "&& ct_label.blocked == 0",
> +             false -> "!ct.est && ct.rel && !ct.new "
> +                      "&& ct_label.blocked == 0",
> +        } in
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(OUT, ACL),
>               .priority         = 65535,
> -             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct_label.blocked == 0",
> +             .__match          = __match,
>               .actions          = "next;",
>               .external_ids     = map_empty());
>
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b0a4adffe5..c3ff3b586a 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -227,6 +227,17 @@
>          </p>
>        </column>
>
> +      <column name="options" key="use_ct_inv_match">
> +        <p>
> +          If set to false, <code>ovn-northd</code> will not use the
> +          <code>ct.inv</code> field in any of the logical flow matches.
> +          The default value is true.  CMS can consider setting this to
> +          false, in case the NIC doesn't support offloading OVS datapath
> +          flows having matches <code>ct_state(..+inv..)</code> or
> +          <code>ct_state(..-inv..)</code>.
> +        </p>
> +      </column>
> +
>        <group title="Options for configuring interconnection route advertisement">
>          <p>
>            These options control how routes are advertised between OVN
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 32f956a797..3a3a443a4e 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -3066,3 +3066,80 @@ AT_CHECK([grep "ls_out_stateful" sw0flows | sort], [0], [dnl
>
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn -- ct.inv usage])
> +ovn_start
> +
> +check ovn-nbctl ls-add sw0
> +check ovn-nbctl lsp-add sw0 sw0p1
> +
> +check ovn-nbctl --wait=sb acl-add sw0 to-lport 1002 ip allow-related
> +
> +ovn-sbctl dump-flows sw0 > sw0flows
> +AT_CAPTURE_FILE([sw0flows])
> +
> +AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 65535 | sort], [0], [dnl
> +  table=9 (ls_in_acl          ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
> +])
> +
> +AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 65535 | sort], [0], [dnl
> +  table=4 (ls_out_acl         ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
> +])
> +
> +# Disable ct.inv usage.
> +check ovn-nbctl --wait=sb set NB_Global . options:use_ct_inv_match=false
> +
> +ovn-sbctl dump-flows sw0 > sw0flows
> +AT_CAPTURE_FILE([sw0flows])
> +
> +AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 65535 | sort], [0], [dnl
> +  table=9 (ls_in_acl          ), priority=65535, match=(!ct.est && ct.rel && !ct.new && ct_label.blocked == 0), action=(next;)
> +  table=9 (ls_in_acl          ), priority=65535, match=((ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && ct.rpl && ct_label.blocked == 0), action=(next;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
> +])
> +
> +AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 65535 | sort], [0], [dnl
> +  table=4 (ls_out_acl         ), priority=65535, match=(!ct.est && ct.rel && !ct.new && ct_label.blocked == 0), action=(next;)
> +  table=4 (ls_out_acl         ), priority=65535, match=((ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(ct.est && !ct.rel && !ct.new && ct.rpl && ct_label.blocked == 0), action=(next;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
> +])
> +
> +AT_CHECK([grep -c "ct.inv" sw0flows], [1], [dnl
> +0
> +])
> +
> +# Enable ct.inv usage.
> +check ovn-nbctl --wait=sb set NB_Global . options:use_ct_inv_match=true
> +
> +ovn-sbctl dump-flows sw0 > sw0flows
> +AT_CAPTURE_FILE([sw0flows])
> +
> +AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 65535 | sort], [0], [dnl
> +  table=9 (ls_in_acl          ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
> +  table=9 (ls_in_acl          ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
> +])
> +
> +AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 65535 | sort], [0], [dnl
> +  table=4 (ls_out_acl         ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
> +  table=4 (ls_out_acl         ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
> +])
> +
> +AT_CHECK([grep -c "ct.inv" sw0flows], [0], [dnl
> +6
> +])
> +
> +AT_CLEANUP
> +])
> --
> 2.29.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ben Pfaff March 26, 2021, 3:46 a.m. UTC | #2
On Mon, Mar 22, 2021 at 04:29:11PM +0530, numans@ovn.org wrote:
> From: Numan Siddique <numans@ovn.org>
> 
> Presently we add 65535 priority lflows in the stages -
> 'ls_in_acl' and 'ls_out_acl' to drop packets which
> match on 'ct.inv'.
> 
> This patch provides an option to not use this field in the
> logical flow matches for the following
> reasons:
> 
>  • Some of the smart NICs which support offloading datapath
>    flows may not support this field.  In such cases, almost
>    all the datapath flows cannot be offloaded if ACLs/load balancers
>    are configured on the logical switch datapath.
> 
>  • A recent commit in kernel ovs datapath sets the committed
>    connection tracking entry to be liberal for out-of-window
>    tcp packets (nf_ct_set_tcp_be_liberal()).  Such TCP
>    packets will not be marked as invalid.
> 
> There are still certain scenarios where a packet can be marked
> invalid.  Like
>  • If the tcp flags are not correct.
> 
>  • If there are checksum errors.
> 
> In such cases, the packet will be delivered to the destination
> port.  So CMS should evaluate their usecases before enabling
> this option.
> 
> Signed-off-by: Numan Siddique <numans@ovn.org>

Thanks for writing dldog code!  I have a few comments.

This looks correct:

    function can_use_ct_inv_match(options: Map<string,string>): bool {
        map_get_bool_def(options, "use_ct_inv_match", true)
    }

    relation UseCtInvMatch(use_ct_inv_match: bool)
    UseCtInvMatch(can_use_ct_inv_match(options)) :-
        nb::NB_Global(._uuid = uuid, .options = options).

I would make a few stylistic changes if I were writing it.  I would drop
the _uuid match because it is not used for anything.  I would use a
relation that just contains a bool, instead of a struct that contains a
bool, because then there are fewer names to invent.  I would usually
drop the function, since it is only used in one place and it is a
one-liner.  So I'd end up with this:

    relation UseCtInvMatch[bool]
    UseCtInvMatch[use_ct_inv_match] :-
        nb::NB_Global(.options = options),
        var use_ct_inv_match = map_get_bool_def(options, "use_ct_inv_match", true).

Or possibly I'd keep the map_get_bool_def(...) expression inside the
brackets, I haven't decided what's really the best way:

    relation UseCtInvMatch[bool]
    UseCtInvMatch[map_get_bool_def(options, "use_ct_inv_match", true)] :-
        nb::NB_Global(.options = options).

I'd also add a rule to handle the case where NB_Global doesn't exist.
This is really out of paranoia.  I don't know whether this can really
happen in practice, but it costs only a few lines and avoids a problem
if it does ever somehow happen:

    UseCtInvMatch[true] :-
        Unit(),
        not nb in nb::NB_Global().

I noticed is that this global feature flag is getting copied inside
every Switch.  That wastes a little memory and I do not know of a
benefit.  So I'd drop it from Switch and join with UseCtInvMatch in the
one place where it's needed.

Finally, there's are lots of redundant strings in ovn_northd.dl.  We can
make that better with a little parameterization, something like this:
    @@ -2290,10 +2290,15 @@ for (Reject(lsuuid, pipeline, stage, acl, fair_meter, extra_match_, extra_action
              .actions          = actions,
              .external_ids     = stage_hint(acl._uuid))
     }

     /* build_acls */
    +for (UseCtInvMatch[use_ct_inv_match]) {
    +    (var ct_inv_or, var and_not_ct_inv) = match (use_ct_inv_match) {
    +        true -> ("ct.inv || ", "&& !ct.inv "),
    +        false -> ("", ""),
    +    } in
         for (sw in &Switch(.ls = ls))
         var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
         {
             /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
              * default.  A related rule at priority 1 is added below if there
    @@ -2354,17 +2359,17 @@ var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
                  *
                  * This is enforced at a higher priority than ACLs can be defined. */
                 Flow(.logical_datapath = ls._uuid,
                      .stage            = switch_stage(IN, ACL),
                      .priority         = 65535,
    -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
    +                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
                      .actions          = "drop;",
                      .external_ids     = map_empty());
                 Flow(.logical_datapath = ls._uuid,
                      .stage            = switch_stage(OUT, ACL),
                      .priority         = 65535,
    -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
    +                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
                      .actions          = "drop;",
                      .external_ids     = map_empty());

                 /* Ingress and Egress ACL Table (Priority 65535).
                  *

When I put all that together, I get the following replacement overall
patch.  I haven't tested it, so maybe I made some mistakes, but it did
compile.  The change to ovn_northd.dl looks big but most of it is
reindentation; if you do a "git show -b" after applying it you'll see
the real changes.

Thanks,

Ben.

-8<--------------------------cut here-------------------------->8--

From: Numan Siddique <numans@ovn.org>
Date: Mon, 22 Mar 2021 16:29:11 +0530
Subject: [PATCH ovn] northd: Provide the option to not use ct.inv in lflows.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Presently we add 65535 priority lflows in the stages -
'ls_in_acl' and 'ls_out_acl' to drop packets which
match on 'ct.inv'.

This patch provides an option to not use this field in the
logical flow matches for the following
reasons:

 • Some of the smart NICs which support offloading datapath
   flows may not support this field.  In such cases, almost
   all the datapath flows cannot be offloaded if ACLs/load balancers
   are configured on the logical switch datapath.

 • A recent commit in kernel ovs datapath sets the committed
   connection tracking entry to be liberal for out-of-window
   tcp packets (nf_ct_set_tcp_be_liberal()).  Such TCP
   packets will not be marked as invalid.

There are still certain scenarios where a packet can be marked
invalid.  Like
 • If the tcp flags are not correct.

 • If there are checksum errors.

In such cases, the packet will be delivered to the destination
port.  So CMS should evaluate their usecases before enabling
this option.

Signed-off-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
---
 NEWS                 |   5 +
 northd/lswitch.dl    |   8 ++
 northd/ovn-northd.c  |  41 +++---
 northd/ovn_northd.dl | 304 ++++++++++++++++++++++---------------------
 ovn-nb.xml           |  11 ++
 5 files changed, 204 insertions(+), 165 deletions(-)

diff --git a/NEWS b/NEWS
index 530c5d42fe85..3181649ba822 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,11 @@ Post-v21.03.0
     (This may take testing and tuning to be effective.)  This version of OVN
     requires DDLog 0.36.
   - Introduce ovn-controller incremetal processing engine statistics
+  - Add a new NB Global option - us_ct_inv_match with the default value of true.
+    If this is set to false, then the logical field - "ct.inv" will not be
+    used in the logical flow matches.  CMS can consider setting this to false,
+    if they want to use smart NICs which don't support offloading datapath flows
+    with this field used.
 
 OVN v21.03.0 - 12 Mar 2021
 -------------------------
diff --git a/northd/lswitch.dl b/northd/lswitch.dl
index 4bf8a5b907a9..db33bad52ffe 100644
--- a/northd/lswitch.dl
+++ b/northd/lswitch.dl
@@ -232,6 +232,7 @@ function ipv6_parse_prefix(s: string): Option<in6_addr> {
     LogicalSwitchLocalnetPorts(ls._uuid, localnet_ports),
     LogicalSwitchHasNonRouterPort(ls._uuid, has_non_router_port),
     mcast_cfg in &McastSwitchCfg(.datapath = ls._uuid),
+    UseCtInvMatch[use_ct_inv_match],
     var subnet =
         match (ls.other_config.get("subnet")) {
             None -> None,
@@ -744,3 +745,10 @@ function put_svc_monitor_mac(options: Map<string,string>,
 relation SvcMonitorMac(mac: eth_addr)
 SvcMonitorMac(get_svc_monitor_mac(options, uuid)) :-
     nb::NB_Global(._uuid = uuid, .options = options).
+
+relation UseCtInvMatch[bool]
+UseCtInvMatch[map_get_bool_def(options, "use_ct_inv_match", true)] :-
+    nb::NB_Global(.options = options).
+UseCtInvMatch[true] :-
+    Unit(),
+    not nb in nb::NB_Global().
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 57df62b9207a..f1332370eb54 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4068,6 +4068,10 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
  * logical datapath only by creating a datapath group. */
 static bool use_logical_dp_groups = false;
 
+/* If this option is 'true' northd will make use of ct.inv match fields.
+ * Otherwise, it will avoid using it.  The default is true. */
+static bool use_ct_inv_match = true;
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
@@ -5647,12 +5651,13 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
          * for deletion (bit 0 of ct_label is set).
          *
          * This is enforced at a higher priority than ACLs can be defined. */
+        char *match = xasprintf("%s(ct.est && ct.rpl && ct_label.blocked == 1)",
+                                use_ct_inv_match ? "ct.inv || " : "");
         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
-                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
-                      "drop;");
+                      match, "drop;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
-                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
-                      "drop;");
+                      match, "drop;");
+        free(match);
 
         /* Ingress and Egress ACL Table (Priority 65535).
          *
@@ -5663,14 +5668,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
          * direction to hit the currently defined policy from ACLs.
          *
          * This is enforced at a higher priority than ACLs can be defined. */
+        match = xasprintf("ct.est && !ct.rel && !ct.new%s && "
+                          "ct.rpl && ct_label.blocked == 0",
+                          use_ct_inv_match ? " && !ct.inv" : "");
+
         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
-                      "ct.est && !ct.rel && !ct.new && !ct.inv "
-                      "&& ct.rpl && ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
-                      "ct.est && !ct.rel && !ct.new && !ct.inv "
-                      "&& ct.rpl && ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
+        free(match);
 
         /* Ingress and Egress ACL Table (Priority 65535).
          *
@@ -5683,14 +5689,14 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
          * a dynamically negotiated FTP data channel), but will allow
          * related traffic such as an ICMP Port Unreachable through
          * that's generated from a non-listening UDP port.  */
+        match = xasprintf("!ct.est && ct.rel && !ct.new%s && "
+                          "ct_label.blocked == 0",
+                          use_ct_inv_match ? " && !ct.inv" : "");
         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
-                      "!ct.est && ct.rel && !ct.new && !ct.inv "
-                      "&& ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
-                      "!ct.est && ct.rel && !ct.new && !ct.inv "
-                      "&& ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
+        free(match);
 
         /* Ingress and Egress ACL Table (Priority 65535).
          *
@@ -12974,6 +12980,9 @@ ovnnb_db_run(struct northd_context *ctx,
 
     use_logical_dp_groups = smap_get_bool(&nb->options,
                                           "use_logical_dp_groups", false);
+    use_ct_inv_match = smap_get_bool(&nb->options,
+                                     "use_ct_inv_match", true);
+
     /* deprecated, use --event instead */
     controller_event_en = smap_get_bool(&nb->options,
                                         "controller_event", false);
diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl
index 4262b83b9760..4f966e50fb7b 100644
--- a/northd/ovn_northd.dl
+++ b/northd/ovn_northd.dl
@@ -2292,173 +2292,179 @@ for (Reject(lsuuid, pipeline, stage, acl, fair_meter, extra_match_, extra_action
 }
 
 /* build_acls */
-for (sw in &Switch(.ls = ls))
-var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
-{
-    /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
-     * default.  A related rule at priority 1 is added below if there
-     * are any stateful ACLs in this datapath. */
-    Flow(.logical_datapath = ls._uuid,
-         .stage            = switch_stage(IN, ACL),
-         .priority         = 0,
-         .__match          = "1",
-         .actions          = "next;",
-         .external_ids     = map_empty());
-    Flow(.logical_datapath = ls._uuid,
-         .stage            = switch_stage(OUT, ACL),
-         .priority         = 0,
-         .__match          = "1",
-         .actions          = "next;",
-         .external_ids     = map_empty());
-
-    if (has_stateful) {
-        /* Ingress and Egress ACL Table (Priority 1).
-         *
-         * By default, traffic is allowed.  This is partially handled by
-         * the Priority 0 ACL flows added earlier, but we also need to
-         * commit IP flows.  This is because, while the initiater's
-         * direction may not have any stateful rules, the server's may
-         * and then its return traffic would not have an associated
-         * conntrack entry and would return "+invalid".
-         *
-         * We use "ct_commit" for a connection that is not already known
-         * by the connection tracker.  Once a connection is committed,
-         * subsequent packets will hit the flow at priority 0 that just
-         * uses "next;"
-         *
-         * We also check for established connections that have ct_label.blocked
-         * set on them.  That's a connection that was disallowed, but is
-         * now allowed by policy again since it hit this default-allow flow.
-         * We need to set ct_label.blocked=0 to let the connection continue,
-         * which will be done by ct_commit() in the "stateful" stage.
-         * Subsequent packets will hit the flow at priority 0 that just
-         * uses "next;". */
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(IN, ACL),
-             .priority         = 1,
-             .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
-             .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
-             .external_ids     = map_empty());
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(OUT, ACL),
-             .priority         = 1,
-             .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
-             .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
-             .external_ids     = map_empty());
-
-        /* Ingress and Egress ACL Table (Priority 65535).
-         *
-         * Always drop traffic that's in an invalid state.  Also drop
-         * reply direction packets for connections that have been marked
-         * for deletion (bit 0 of ct_label is set).
-         *
-         * This is enforced at a higher priority than ACLs can be defined. */
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(IN, ACL),
-             .priority         = 65535,
-             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
-             .actions          = "drop;",
-             .external_ids     = map_empty());
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(OUT, ACL),
-             .priority         = 65535,
-             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
-             .actions          = "drop;",
-             .external_ids     = map_empty());
-
-        /* Ingress and Egress ACL Table (Priority 65535).
-         *
-         * Allow reply traffic that is part of an established
-         * conntrack entry that has not been marked for deletion
-         * (bit 0 of ct_label).  We only match traffic in the
-         * reply direction because we want traffic in the request
-         * direction to hit the currently defined policy from ACLs.
-         *
-         * This is enforced at a higher priority than ACLs can be defined. */
+for (UseCtInvMatch[use_ct_inv_match]) {
+    (var ct_inv_or, var and_not_ct_inv) = match (use_ct_inv_match) {
+        true -> ("ct.inv || ", "&& !ct.inv "),
+        false -> ("", ""),
+    } in
+    for (sw in &Switch(.ls = ls))
+    var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
+    {
+        /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
+         * default.  A related rule at priority 1 is added below if there
+         * are any stateful ACLs in this datapath. */
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(IN, ACL),
-             .priority         = 65535,
-             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
-                                 "&& ct.rpl && ct_label.blocked == 0",
+             .priority         = 0,
+             .__match          = "1",
              .actions          = "next;",
              .external_ids     = map_empty());
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(OUT, ACL),
-             .priority         = 65535,
-             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
-                                 "&& ct.rpl && ct_label.blocked == 0",
+             .priority         = 0,
+             .__match          = "1",
              .actions          = "next;",
              .external_ids     = map_empty());
 
-        /* Ingress and Egress ACL Table (Priority 65535).
-         *
-         * Allow traffic that is related to an existing conntrack entry that
-         * has not been marked for deletion (bit 0 of ct_label).
-         *
-         * This is enforced at a higher priority than ACLs can be defined.
-         *
-         * NOTE: This does not support related data sessions (eg,
-         * a dynamically negotiated FTP data channel), but will allow
-         * related traffic such as an ICMP Port Unreachable through
-         * that's generated from a non-listening UDP port.  */
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(IN, ACL),
-             .priority         = 65535,
-             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
-                                 "&& ct_label.blocked == 0",
-             .actions          = "next;",
-             .external_ids     = map_empty());
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(OUT, ACL),
-             .priority         = 65535,
-             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
-                                 "&& ct_label.blocked == 0",
-             .actions          = "next;",
-             .external_ids     = map_empty());
+        if (has_stateful) {
+            /* Ingress and Egress ACL Table (Priority 1).
+             *
+             * By default, traffic is allowed.  This is partially handled by
+             * the Priority 0 ACL flows added earlier, but we also need to
+             * commit IP flows.  This is because, while the initiater's
+             * direction may not have any stateful rules, the server's may
+             * and then its return traffic would not have an associated
+             * conntrack entry and would return "+invalid".
+             *
+             * We use "ct_commit" for a connection that is not already known
+             * by the connection tracker.  Once a connection is committed,
+             * subsequent packets will hit the flow at priority 0 that just
+             * uses "next;"
+             *
+             * We also check for established connections that have ct_label.blocked
+             * set on them.  That's a connection that was disallowed, but is
+             * now allowed by policy again since it hit this default-allow flow.
+             * We need to set ct_label.blocked=0 to let the connection continue,
+             * which will be done by ct_commit() in the "stateful" stage.
+             * Subsequent packets will hit the flow at priority 0 that just
+             * uses "next;". */
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(IN, ACL),
+                 .priority         = 1,
+                 .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
+                 .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
+                 .external_ids     = map_empty());
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(OUT, ACL),
+                 .priority         = 1,
+                 .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
+                 .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
+                 .external_ids     = map_empty());
 
-        /* Ingress and Egress ACL Table (Priority 65535).
-         *
-         * Not to do conntrack on ND packets. */
+            /* Ingress and Egress ACL Table (Priority 65535).
+             *
+             * Always drop traffic that's in an invalid state.  Also drop
+             * reply direction packets for connections that have been marked
+             * for deletion (bit 0 of ct_label is set).
+             *
+             * This is enforced at a higher priority than ACLs can be defined. */
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(IN, ACL),
+                 .priority         = 65535,
+                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
+                 .actions          = "drop;",
+                 .external_ids     = map_empty());
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(OUT, ACL),
+                 .priority         = 65535,
+                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
+                 .actions          = "drop;",
+                 .external_ids     = map_empty());
+
+            /* Ingress and Egress ACL Table (Priority 65535).
+             *
+             * Allow reply traffic that is part of an established
+             * conntrack entry that has not been marked for deletion
+             * (bit 0 of ct_label).  We only match traffic in the
+             * reply direction because we want traffic in the request
+             * direction to hit the currently defined policy from ACLs.
+             *
+             * This is enforced at a higher priority than ACLs can be defined. */
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(IN, ACL),
+                 .priority         = 65535,
+                 .__match          = "ct.est && !ct.rel && !ct.new " ++ and_not_ct_inv ++
+                                     "&& ct.rpl && ct_label.blocked == 0",
+                 .actions          = "next;",
+                 .external_ids     = map_empty());
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(OUT, ACL),
+                 .priority         = 65535,
+                 .__match          = "ct.est && !ct.rel && !ct.new " ++ and_not_ct_inv ++
+                                     "&& ct.rpl && ct_label.blocked == 0",
+                 .actions          = "next;",
+                 .external_ids     = map_empty());
+
+            /* Ingress and Egress ACL Table (Priority 65535).
+             *
+             * Allow traffic that is related to an existing conntrack entry that
+             * has not been marked for deletion (bit 0 of ct_label).
+             *
+             * This is enforced at a higher priority than ACLs can be defined.
+             *
+             * NOTE: This does not support related data sessions (eg,
+             * a dynamically negotiated FTP data channel), but will allow
+             * related traffic such as an ICMP Port Unreachable through
+             * that's generated from a non-listening UDP port.  */
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(IN, ACL),
+                 .priority         = 65535,
+                 .__match          = "!ct.est && ct.rel && !ct.new " ++ and_not_ct_inv ++
+                                     "&& ct_label.blocked == 0",
+                 .actions          = "next;",
+                 .external_ids     = map_empty());
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(OUT, ACL),
+                 .priority         = 65535,
+                 .__match          = "!ct.est && ct.rel && !ct.new " ++ and_not_ct_inv ++
+                                     "&& ct_label.blocked == 0",
+                 .actions          = "next;",
+                 .external_ids     = map_empty());
+
+            /* Ingress and Egress ACL Table (Priority 65535).
+             *
+             * Not to do conntrack on ND packets. */
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(IN, ACL),
+                 .priority         = 65535,
+                 .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
+                 .actions          = "next;",
+                 .external_ids     = map_empty());
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(OUT, ACL),
+                 .priority         = 65535,
+                 .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
+                 .actions          = "next;",
+                 .external_ids     = map_empty())
+        };
+
+        /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
+         * if the CMS has configured DNS records for the datapath.
+         */
+        if (sw.has_dns_records) {
+            Flow(.logical_datapath = ls._uuid,
+                 .stage            = switch_stage(OUT, ACL),
+                 .priority         = 34000,
+                 .__match          = "udp.src == 53",
+                 .actions          = if has_stateful "ct_commit; next;" else "next;",
+                 .external_ids     = map_empty())
+        };
+
+        /* Add a 34000 priority flow to advance the service monitor reply
+         * packets to skip applying ingress ACLs. */
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(IN, ACL),
-             .priority         = 65535,
-             .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
+             .priority         = 34000,
+             .__match          = "eth.dst == $svc_monitor_mac",
              .actions          = "next;",
              .external_ids     = map_empty());
-        Flow(.logical_datapath = ls._uuid,
-             .stage            = switch_stage(OUT, ACL),
-             .priority         = 65535,
-             .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
-             .actions          = "next;",
-             .external_ids     = map_empty())
-    };
-
-    /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
-     * if the CMS has configured DNS records for the datapath.
-     */
-    if (sw.has_dns_records) {
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(OUT, ACL),
              .priority         = 34000,
-             .__match          = "udp.src == 53",
-             .actions          = if has_stateful "ct_commit; next;" else "next;",
+             .__match          = "eth.src == $svc_monitor_mac",
+             .actions          = "next;",
              .external_ids     = map_empty())
-    };
-
-    /* Add a 34000 priority flow to advance the service monitor reply
-     * packets to skip applying ingress ACLs. */
-    Flow(.logical_datapath = ls._uuid,
-         .stage            = switch_stage(IN, ACL),
-         .priority         = 34000,
-         .__match          = "eth.dst == $svc_monitor_mac",
-         .actions          = "next;",
-         .external_ids     = map_empty());
-    Flow(.logical_datapath = ls._uuid,
-         .stage            = switch_stage(OUT, ACL),
-         .priority         = 34000,
-         .__match          = "eth.src == $svc_monitor_mac",
-         .actions          = "next;",
-         .external_ids     = map_empty())
+    }
 }
 
 /* This stage builds hints for the IN/OUT_ACL stage. Based on various
diff --git a/ovn-nb.xml b/ovn-nb.xml
index b0a4adffe537..c3ff3b586ad1 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -227,6 +227,17 @@
         </p>
       </column>
 
+      <column name="options" key="use_ct_inv_match">
+        <p>
+          If set to false, <code>ovn-northd</code> will not use the
+          <code>ct.inv</code> field in any of the logical flow matches.
+          The default value is true.  CMS can consider setting this to
+          false, in case the NIC doesn't support offloading OVS datapath
+          flows having matches <code>ct_state(..+inv..)</code> or
+          <code>ct_state(..-inv..)</code>.
+        </p>
+      </column>
+
       <group title="Options for configuring interconnection route advertisement">
         <p>
           These options control how routes are advertised between OVN
Numan Siddique March 26, 2021, 3:54 a.m. UTC | #3
On Fri, Mar 26, 2021 at 9:16 AM Ben Pfaff <blp@ovn.org> wrote:
>
> On Mon, Mar 22, 2021 at 04:29:11PM +0530, numans@ovn.org wrote:
> > From: Numan Siddique <numans@ovn.org>
> >
> > Presently we add 65535 priority lflows in the stages -
> > 'ls_in_acl' and 'ls_out_acl' to drop packets which
> > match on 'ct.inv'.
> >
> > This patch provides an option to not use this field in the
> > logical flow matches for the following
> > reasons:
> >
> >  • Some of the smart NICs which support offloading datapath
> >    flows may not support this field.  In such cases, almost
> >    all the datapath flows cannot be offloaded if ACLs/load balancers
> >    are configured on the logical switch datapath.
> >
> >  • A recent commit in kernel ovs datapath sets the committed
> >    connection tracking entry to be liberal for out-of-window
> >    tcp packets (nf_ct_set_tcp_be_liberal()).  Such TCP
> >    packets will not be marked as invalid.
> >
> > There are still certain scenarios where a packet can be marked
> > invalid.  Like
> >  • If the tcp flags are not correct.
> >
> >  • If there are checksum errors.
> >
> > In such cases, the packet will be delivered to the destination
> > port.  So CMS should evaluate their usecases before enabling
> > this option.
> >
> > Signed-off-by: Numan Siddique <numans@ovn.org>
>
> Thanks for writing dldog code!  I have a few comments.
>
> This looks correct:
>
>     function can_use_ct_inv_match(options: Map<string,string>): bool {
>         map_get_bool_def(options, "use_ct_inv_match", true)
>     }
>
>     relation UseCtInvMatch(use_ct_inv_match: bool)
>     UseCtInvMatch(can_use_ct_inv_match(options)) :-
>         nb::NB_Global(._uuid = uuid, .options = options).
>
> I would make a few stylistic changes if I were writing it.  I would drop
> the _uuid match because it is not used for anything.  I would use a
> relation that just contains a bool, instead of a struct that contains a
> bool, because then there are fewer names to invent.  I would usually
> drop the function, since it is only used in one place and it is a
> one-liner.  So I'd end up with this:
>
>     relation UseCtInvMatch[bool]
>     UseCtInvMatch[use_ct_inv_match] :-
>         nb::NB_Global(.options = options),
>         var use_ct_inv_match = map_get_bool_def(options, "use_ct_inv_match", true).
>
> Or possibly I'd keep the map_get_bool_def(...) expression inside the
> brackets, I haven't decided what's really the best way:
>
>     relation UseCtInvMatch[bool]
>     UseCtInvMatch[map_get_bool_def(options, "use_ct_inv_match", true)] :-
>         nb::NB_Global(.options = options).
>
> I'd also add a rule to handle the case where NB_Global doesn't exist.
> This is really out of paranoia.  I don't know whether this can really
> happen in practice, but it costs only a few lines and avoids a problem
> if it does ever somehow happen:
>
>     UseCtInvMatch[true] :-
>         Unit(),
>         not nb in nb::NB_Global().
>
> I noticed is that this global feature flag is getting copied inside
> every Switch.  That wastes a little memory and I do not know of a
> benefit.  So I'd drop it from Switch and join with UseCtInvMatch in the
> one place where it's needed.
>
> Finally, there's are lots of redundant strings in ovn_northd.dl.  We can
> make that better with a little parameterization, something like this:
>     @@ -2290,10 +2290,15 @@ for (Reject(lsuuid, pipeline, stage, acl, fair_meter, extra_match_, extra_action
>               .actions          = actions,
>               .external_ids     = stage_hint(acl._uuid))
>      }
>
>      /* build_acls */
>     +for (UseCtInvMatch[use_ct_inv_match]) {
>     +    (var ct_inv_or, var and_not_ct_inv) = match (use_ct_inv_match) {
>     +        true -> ("ct.inv || ", "&& !ct.inv "),
>     +        false -> ("", ""),
>     +    } in
>          for (sw in &Switch(.ls = ls))
>          var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
>          {
>              /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
>               * default.  A related rule at priority 1 is added below if there
>     @@ -2354,17 +2359,17 @@ var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
>                   *
>                   * This is enforced at a higher priority than ACLs can be defined. */
>                  Flow(.logical_datapath = ls._uuid,
>                       .stage            = switch_stage(IN, ACL),
>                       .priority         = 65535,
>     -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
>     +                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
>                       .actions          = "drop;",
>                       .external_ids     = map_empty());
>                  Flow(.logical_datapath = ls._uuid,
>                       .stage            = switch_stage(OUT, ACL),
>                       .priority         = 65535,
>     -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
>     +                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
>                       .actions          = "drop;",
>                       .external_ids     = map_empty());
>
>                  /* Ingress and Egress ACL Table (Priority 65535).
>                   *
>
> When I put all that together, I get the following replacement overall
> patch.  I haven't tested it, so maybe I made some mistakes, but it did
> compile.  The change to ovn_northd.dl looks big but most of it is
> reindentation; if you do a "git show -b" after applying it you'll see
> the real changes.
>
> Thanks,

Thanks for all the suggestions.  I will apply those changes and test it out.

I knew that copying the global flag to each switch is redundant and a waste
of memory, but I couldn't figure out a better way.  Now it makes all sense.

Regards
Numan

>
> Ben.
>
> -8<--------------------------cut here-------------------------->8--
>
> From: Numan Siddique <numans@ovn.org>
> Date: Mon, 22 Mar 2021 16:29:11 +0530
> Subject: [PATCH ovn] northd: Provide the option to not use ct.inv in lflows.
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Presently we add 65535 priority lflows in the stages -
> 'ls_in_acl' and 'ls_out_acl' to drop packets which
> match on 'ct.inv'.
>
> This patch provides an option to not use this field in the
> logical flow matches for the following
> reasons:
>
>  • Some of the smart NICs which support offloading datapath
>    flows may not support this field.  In such cases, almost
>    all the datapath flows cannot be offloaded if ACLs/load balancers
>    are configured on the logical switch datapath.
>
>  • A recent commit in kernel ovs datapath sets the committed
>    connection tracking entry to be liberal for out-of-window
>    tcp packets (nf_ct_set_tcp_be_liberal()).  Such TCP
>    packets will not be marked as invalid.
>
> There are still certain scenarios where a packet can be marked
> invalid.  Like
>  • If the tcp flags are not correct.
>
>  • If there are checksum errors.
>
> In such cases, the packet will be delivered to the destination
> port.  So CMS should evaluate their usecases before enabling
> this option.
>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> Signed-off-by: Ben Pfaff <blp@ovn.org>
> ---
>  NEWS                 |   5 +
>  northd/lswitch.dl    |   8 ++
>  northd/ovn-northd.c  |  41 +++---
>  northd/ovn_northd.dl | 304 ++++++++++++++++++++++---------------------
>  ovn-nb.xml           |  11 ++
>  5 files changed, 204 insertions(+), 165 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 530c5d42fe85..3181649ba822 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -7,6 +7,11 @@ Post-v21.03.0
>      (This may take testing and tuning to be effective.)  This version of OVN
>      requires DDLog 0.36.
>    - Introduce ovn-controller incremetal processing engine statistics
> +  - Add a new NB Global option - us_ct_inv_match with the default value of true.
> +    If this is set to false, then the logical field - "ct.inv" will not be
> +    used in the logical flow matches.  CMS can consider setting this to false,
> +    if they want to use smart NICs which don't support offloading datapath flows
> +    with this field used.
>
>  OVN v21.03.0 - 12 Mar 2021
>  -------------------------
> diff --git a/northd/lswitch.dl b/northd/lswitch.dl
> index 4bf8a5b907a9..db33bad52ffe 100644
> --- a/northd/lswitch.dl
> +++ b/northd/lswitch.dl
> @@ -232,6 +232,7 @@ function ipv6_parse_prefix(s: string): Option<in6_addr> {
>      LogicalSwitchLocalnetPorts(ls._uuid, localnet_ports),
>      LogicalSwitchHasNonRouterPort(ls._uuid, has_non_router_port),
>      mcast_cfg in &McastSwitchCfg(.datapath = ls._uuid),
> +    UseCtInvMatch[use_ct_inv_match],
>      var subnet =
>          match (ls.other_config.get("subnet")) {
>              None -> None,
> @@ -744,3 +745,10 @@ function put_svc_monitor_mac(options: Map<string,string>,
>  relation SvcMonitorMac(mac: eth_addr)
>  SvcMonitorMac(get_svc_monitor_mac(options, uuid)) :-
>      nb::NB_Global(._uuid = uuid, .options = options).
> +
> +relation UseCtInvMatch[bool]
> +UseCtInvMatch[map_get_bool_def(options, "use_ct_inv_match", true)] :-
> +    nb::NB_Global(.options = options).
> +UseCtInvMatch[true] :-
> +    Unit(),
> +    not nb in nb::NB_Global().
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 57df62b9207a..f1332370eb54 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4068,6 +4068,10 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>   * logical datapath only by creating a datapath group. */
>  static bool use_logical_dp_groups = false;
>
> +/* If this option is 'true' northd will make use of ct.inv match fields.
> + * Otherwise, it will avoid using it.  The default is true. */
> +static bool use_ct_inv_match = true;
> +
>  /* Adds a row with the specified contents to the Logical_Flow table. */
>  static void
>  ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
> @@ -5647,12 +5651,13 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
>           * for deletion (bit 0 of ct_label is set).
>           *
>           * This is enforced at a higher priority than ACLs can be defined. */
> +        char *match = xasprintf("%s(ct.est && ct.rpl && ct_label.blocked == 1)",
> +                                use_ct_inv_match ? "ct.inv || " : "");
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> -                      "drop;");
> +                      match, "drop;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> -                      "drop;");
> +                      match, "drop;");
> +        free(match);
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -5663,14 +5668,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
>           * direction to hit the currently defined policy from ACLs.
>           *
>           * This is enforced at a higher priority than ACLs can be defined. */
> +        match = xasprintf("ct.est && !ct.rel && !ct.new%s && "
> +                          "ct.rpl && ct_label.blocked == 0",
> +                          use_ct_inv_match ? " && !ct.inv" : "");
> +
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                      "&& ct.rpl && ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                      "&& ct.rpl && ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
> +        free(match);
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -5683,14 +5689,14 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows,
>           * a dynamically negotiated FTP data channel), but will allow
>           * related traffic such as an ICMP Port Unreachable through
>           * that's generated from a non-listening UDP port.  */
> +        match = xasprintf("!ct.est && ct.rel && !ct.new%s && "
> +                          "ct_label.blocked == 0",
> +                          use_ct_inv_match ? " && !ct.inv" : "");
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                      "&& ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                      "&& ct_label.blocked == 0",
> -                      "next;");
> +                      match, "next;");
> +        free(match);
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -12974,6 +12980,9 @@ ovnnb_db_run(struct northd_context *ctx,
>
>      use_logical_dp_groups = smap_get_bool(&nb->options,
>                                            "use_logical_dp_groups", false);
> +    use_ct_inv_match = smap_get_bool(&nb->options,
> +                                     "use_ct_inv_match", true);
> +
>      /* deprecated, use --event instead */
>      controller_event_en = smap_get_bool(&nb->options,
>                                          "controller_event", false);
> diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl
> index 4262b83b9760..4f966e50fb7b 100644
> --- a/northd/ovn_northd.dl
> +++ b/northd/ovn_northd.dl
> @@ -2292,173 +2292,179 @@ for (Reject(lsuuid, pipeline, stage, acl, fair_meter, extra_match_, extra_action
>  }
>
>  /* build_acls */
> -for (sw in &Switch(.ls = ls))
> -var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
> -{
> -    /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
> -     * default.  A related rule at priority 1 is added below if there
> -     * are any stateful ACLs in this datapath. */
> -    Flow(.logical_datapath = ls._uuid,
> -         .stage            = switch_stage(IN, ACL),
> -         .priority         = 0,
> -         .__match          = "1",
> -         .actions          = "next;",
> -         .external_ids     = map_empty());
> -    Flow(.logical_datapath = ls._uuid,
> -         .stage            = switch_stage(OUT, ACL),
> -         .priority         = 0,
> -         .__match          = "1",
> -         .actions          = "next;",
> -         .external_ids     = map_empty());
> -
> -    if (has_stateful) {
> -        /* Ingress and Egress ACL Table (Priority 1).
> -         *
> -         * By default, traffic is allowed.  This is partially handled by
> -         * the Priority 0 ACL flows added earlier, but we also need to
> -         * commit IP flows.  This is because, while the initiater's
> -         * direction may not have any stateful rules, the server's may
> -         * and then its return traffic would not have an associated
> -         * conntrack entry and would return "+invalid".
> -         *
> -         * We use "ct_commit" for a connection that is not already known
> -         * by the connection tracker.  Once a connection is committed,
> -         * subsequent packets will hit the flow at priority 0 that just
> -         * uses "next;"
> -         *
> -         * We also check for established connections that have ct_label.blocked
> -         * set on them.  That's a connection that was disallowed, but is
> -         * now allowed by policy again since it hit this default-allow flow.
> -         * We need to set ct_label.blocked=0 to let the connection continue,
> -         * which will be done by ct_commit() in the "stateful" stage.
> -         * Subsequent packets will hit the flow at priority 0 that just
> -         * uses "next;". */
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(IN, ACL),
> -             .priority         = 1,
> -             .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
> -             .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
> -             .external_ids     = map_empty());
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(OUT, ACL),
> -             .priority         = 1,
> -             .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
> -             .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
> -             .external_ids     = map_empty());
> -
> -        /* Ingress and Egress ACL Table (Priority 65535).
> -         *
> -         * Always drop traffic that's in an invalid state.  Also drop
> -         * reply direction packets for connections that have been marked
> -         * for deletion (bit 0 of ct_label is set).
> -         *
> -         * This is enforced at a higher priority than ACLs can be defined. */
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(IN, ACL),
> -             .priority         = 65535,
> -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> -             .actions          = "drop;",
> -             .external_ids     = map_empty());
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(OUT, ACL),
> -             .priority         = 65535,
> -             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
> -             .actions          = "drop;",
> -             .external_ids     = map_empty());
> -
> -        /* Ingress and Egress ACL Table (Priority 65535).
> -         *
> -         * Allow reply traffic that is part of an established
> -         * conntrack entry that has not been marked for deletion
> -         * (bit 0 of ct_label).  We only match traffic in the
> -         * reply direction because we want traffic in the request
> -         * direction to hit the currently defined policy from ACLs.
> -         *
> -         * This is enforced at a higher priority than ACLs can be defined. */
> +for (UseCtInvMatch[use_ct_inv_match]) {
> +    (var ct_inv_or, var and_not_ct_inv) = match (use_ct_inv_match) {
> +        true -> ("ct.inv || ", "&& !ct.inv "),
> +        false -> ("", ""),
> +    } in
> +    for (sw in &Switch(.ls = ls))
> +    var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
> +    {
> +        /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
> +         * default.  A related rule at priority 1 is added below if there
> +         * are any stateful ACLs in this datapath. */
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(IN, ACL),
> -             .priority         = 65535,
> -             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct.rpl && ct_label.blocked == 0",
> +             .priority         = 0,
> +             .__match          = "1",
>               .actions          = "next;",
>               .external_ids     = map_empty());
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(OUT, ACL),
> -             .priority         = 65535,
> -             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct.rpl && ct_label.blocked == 0",
> +             .priority         = 0,
> +             .__match          = "1",
>               .actions          = "next;",
>               .external_ids     = map_empty());
>
> -        /* Ingress and Egress ACL Table (Priority 65535).
> -         *
> -         * Allow traffic that is related to an existing conntrack entry that
> -         * has not been marked for deletion (bit 0 of ct_label).
> -         *
> -         * This is enforced at a higher priority than ACLs can be defined.
> -         *
> -         * NOTE: This does not support related data sessions (eg,
> -         * a dynamically negotiated FTP data channel), but will allow
> -         * related traffic such as an ICMP Port Unreachable through
> -         * that's generated from a non-listening UDP port.  */
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(IN, ACL),
> -             .priority         = 65535,
> -             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct_label.blocked == 0",
> -             .actions          = "next;",
> -             .external_ids     = map_empty());
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(OUT, ACL),
> -             .priority         = 65535,
> -             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
> -                                 "&& ct_label.blocked == 0",
> -             .actions          = "next;",
> -             .external_ids     = map_empty());
> +        if (has_stateful) {
> +            /* Ingress and Egress ACL Table (Priority 1).
> +             *
> +             * By default, traffic is allowed.  This is partially handled by
> +             * the Priority 0 ACL flows added earlier, but we also need to
> +             * commit IP flows.  This is because, while the initiater's
> +             * direction may not have any stateful rules, the server's may
> +             * and then its return traffic would not have an associated
> +             * conntrack entry and would return "+invalid".
> +             *
> +             * We use "ct_commit" for a connection that is not already known
> +             * by the connection tracker.  Once a connection is committed,
> +             * subsequent packets will hit the flow at priority 0 that just
> +             * uses "next;"
> +             *
> +             * We also check for established connections that have ct_label.blocked
> +             * set on them.  That's a connection that was disallowed, but is
> +             * now allowed by policy again since it hit this default-allow flow.
> +             * We need to set ct_label.blocked=0 to let the connection continue,
> +             * which will be done by ct_commit() in the "stateful" stage.
> +             * Subsequent packets will hit the flow at priority 0 that just
> +             * uses "next;". */
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(IN, ACL),
> +                 .priority         = 1,
> +                 .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
> +                 .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
> +                 .external_ids     = map_empty());
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(OUT, ACL),
> +                 .priority         = 1,
> +                 .__match          = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
> +                 .actions          = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;",
> +                 .external_ids     = map_empty());
>
> -        /* Ingress and Egress ACL Table (Priority 65535).
> -         *
> -         * Not to do conntrack on ND packets. */
> +            /* Ingress and Egress ACL Table (Priority 65535).
> +             *
> +             * Always drop traffic that's in an invalid state.  Also drop
> +             * reply direction packets for connections that have been marked
> +             * for deletion (bit 0 of ct_label is set).
> +             *
> +             * This is enforced at a higher priority than ACLs can be defined. */
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(IN, ACL),
> +                 .priority         = 65535,
> +                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
> +                 .actions          = "drop;",
> +                 .external_ids     = map_empty());
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(OUT, ACL),
> +                 .priority         = 65535,
> +                 .__match          = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)",
> +                 .actions          = "drop;",
> +                 .external_ids     = map_empty());
> +
> +            /* Ingress and Egress ACL Table (Priority 65535).
> +             *
> +             * Allow reply traffic that is part of an established
> +             * conntrack entry that has not been marked for deletion
> +             * (bit 0 of ct_label).  We only match traffic in the
> +             * reply direction because we want traffic in the request
> +             * direction to hit the currently defined policy from ACLs.
> +             *
> +             * This is enforced at a higher priority than ACLs can be defined. */
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(IN, ACL),
> +                 .priority         = 65535,
> +                 .__match          = "ct.est && !ct.rel && !ct.new " ++ and_not_ct_inv ++
> +                                     "&& ct.rpl && ct_label.blocked == 0",
> +                 .actions          = "next;",
> +                 .external_ids     = map_empty());
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(OUT, ACL),
> +                 .priority         = 65535,
> +                 .__match          = "ct.est && !ct.rel && !ct.new " ++ and_not_ct_inv ++
> +                                     "&& ct.rpl && ct_label.blocked == 0",
> +                 .actions          = "next;",
> +                 .external_ids     = map_empty());
> +
> +            /* Ingress and Egress ACL Table (Priority 65535).
> +             *
> +             * Allow traffic that is related to an existing conntrack entry that
> +             * has not been marked for deletion (bit 0 of ct_label).
> +             *
> +             * This is enforced at a higher priority than ACLs can be defined.
> +             *
> +             * NOTE: This does not support related data sessions (eg,
> +             * a dynamically negotiated FTP data channel), but will allow
> +             * related traffic such as an ICMP Port Unreachable through
> +             * that's generated from a non-listening UDP port.  */
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(IN, ACL),
> +                 .priority         = 65535,
> +                 .__match          = "!ct.est && ct.rel && !ct.new " ++ and_not_ct_inv ++
> +                                     "&& ct_label.blocked == 0",
> +                 .actions          = "next;",
> +                 .external_ids     = map_empty());
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(OUT, ACL),
> +                 .priority         = 65535,
> +                 .__match          = "!ct.est && ct.rel && !ct.new " ++ and_not_ct_inv ++
> +                                     "&& ct_label.blocked == 0",
> +                 .actions          = "next;",
> +                 .external_ids     = map_empty());
> +
> +            /* Ingress and Egress ACL Table (Priority 65535).
> +             *
> +             * Not to do conntrack on ND packets. */
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(IN, ACL),
> +                 .priority         = 65535,
> +                 .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
> +                 .actions          = "next;",
> +                 .external_ids     = map_empty());
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(OUT, ACL),
> +                 .priority         = 65535,
> +                 .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
> +                 .actions          = "next;",
> +                 .external_ids     = map_empty())
> +        };
> +
> +        /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
> +         * if the CMS has configured DNS records for the datapath.
> +         */
> +        if (sw.has_dns_records) {
> +            Flow(.logical_datapath = ls._uuid,
> +                 .stage            = switch_stage(OUT, ACL),
> +                 .priority         = 34000,
> +                 .__match          = "udp.src == 53",
> +                 .actions          = if has_stateful "ct_commit; next;" else "next;",
> +                 .external_ids     = map_empty())
> +        };
> +
> +        /* Add a 34000 priority flow to advance the service monitor reply
> +         * packets to skip applying ingress ACLs. */
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(IN, ACL),
> -             .priority         = 65535,
> -             .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
> +             .priority         = 34000,
> +             .__match          = "eth.dst == $svc_monitor_mac",
>               .actions          = "next;",
>               .external_ids     = map_empty());
> -        Flow(.logical_datapath = ls._uuid,
> -             .stage            = switch_stage(OUT, ACL),
> -             .priority         = 65535,
> -             .__match          = "nd || nd_ra || nd_rs || mldv1 || mldv2",
> -             .actions          = "next;",
> -             .external_ids     = map_empty())
> -    };
> -
> -    /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
> -     * if the CMS has configured DNS records for the datapath.
> -     */
> -    if (sw.has_dns_records) {
>          Flow(.logical_datapath = ls._uuid,
>               .stage            = switch_stage(OUT, ACL),
>               .priority         = 34000,
> -             .__match          = "udp.src == 53",
> -             .actions          = if has_stateful "ct_commit; next;" else "next;",
> +             .__match          = "eth.src == $svc_monitor_mac",
> +             .actions          = "next;",
>               .external_ids     = map_empty())
> -    };
> -
> -    /* Add a 34000 priority flow to advance the service monitor reply
> -     * packets to skip applying ingress ACLs. */
> -    Flow(.logical_datapath = ls._uuid,
> -         .stage            = switch_stage(IN, ACL),
> -         .priority         = 34000,
> -         .__match          = "eth.dst == $svc_monitor_mac",
> -         .actions          = "next;",
> -         .external_ids     = map_empty());
> -    Flow(.logical_datapath = ls._uuid,
> -         .stage            = switch_stage(OUT, ACL),
> -         .priority         = 34000,
> -         .__match          = "eth.src == $svc_monitor_mac",
> -         .actions          = "next;",
> -         .external_ids     = map_empty())
> +    }
>  }
>
>  /* This stage builds hints for the IN/OUT_ACL stage. Based on various
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b0a4adffe537..c3ff3b586ad1 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -227,6 +227,17 @@
>          </p>
>        </column>
>
> +      <column name="options" key="use_ct_inv_match">
> +        <p>
> +          If set to false, <code>ovn-northd</code> will not use the
> +          <code>ct.inv</code> field in any of the logical flow matches.
> +          The default value is true.  CMS can consider setting this to
> +          false, in case the NIC doesn't support offloading OVS datapath
> +          flows having matches <code>ct_state(..+inv..)</code> or
> +          <code>ct_state(..-inv..)</code>.
> +        </p>
> +      </column>
> +
>        <group title="Options for configuring interconnection route advertisement">
>          <p>
>            These options control how routes are advertised between OVN
> --
> 2.29.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 530c5d42fe..3181649ba8 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,11 @@  Post-v21.03.0
     (This may take testing and tuning to be effective.)  This version of OVN
     requires DDLog 0.36.
   - Introduce ovn-controller incremetal processing engine statistics
+  - Add a new NB Global option - us_ct_inv_match with the default value of true.
+    If this is set to false, then the logical field - "ct.inv" will not be
+    used in the logical flow matches.  CMS can consider setting this to false,
+    if they want to use smart NICs which don't support offloading datapath flows
+    with this field used.
 
 OVN v21.03.0 - 12 Mar 2021
 -------------------------
diff --git a/northd/lswitch.dl b/northd/lswitch.dl
index 4bf8a5b907..1daf249382 100644
--- a/northd/lswitch.dl
+++ b/northd/lswitch.dl
@@ -197,6 +197,7 @@  relation &Switch(
     ipv6_prefix:       Option<in6_addr>,
     mcast_cfg:         Ref<McastSwitchCfg>,
     is_vlan_transparent: bool,
+    use_ct_inv_match:  bool,
 
     /* Does this switch have at least one port with type != "router"? */
     has_non_router_port: bool
@@ -223,7 +224,8 @@  function ipv6_parse_prefix(s: string): Option<in6_addr> {
         .ipv6_prefix       = ipv6_prefix,
         .mcast_cfg         = mcast_cfg,
         .has_non_router_port = has_non_router_port,
-        .is_vlan_transparent = is_vlan_transparent) :-
+        .is_vlan_transparent = is_vlan_transparent,
+        .use_ct_inv_match = use_ct_inv_match) :-
     nb::Logical_Switch[ls],
     LogicalSwitchHasStatefulACL(ls._uuid, has_stateful_acl),
     LogicalSwitchHasLBVIP(ls._uuid, has_lb_vip),
@@ -232,6 +234,7 @@  function ipv6_parse_prefix(s: string): Option<in6_addr> {
     LogicalSwitchLocalnetPorts(ls._uuid, localnet_ports),
     LogicalSwitchHasNonRouterPort(ls._uuid, has_non_router_port),
     mcast_cfg in &McastSwitchCfg(.datapath = ls._uuid),
+    UseCtInvMatch(use_ct_inv_match),
     var subnet =
         match (ls.other_config.get("subnet")) {
             None -> None,
@@ -744,3 +747,11 @@  function put_svc_monitor_mac(options: Map<string,string>,
 relation SvcMonitorMac(mac: eth_addr)
 SvcMonitorMac(get_svc_monitor_mac(options, uuid)) :-
     nb::NB_Global(._uuid = uuid, .options = options).
+
+function can_use_ct_inv_match(options: Map<string,string>): bool {
+    map_get_bool_def(options, "use_ct_inv_match", true)
+}
+
+relation UseCtInvMatch(use_ct_inv_match: bool)
+UseCtInvMatch(can_use_ct_inv_match(options)) :-
+    nb::NB_Global(._uuid = uuid, .options = options).
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3221fb9ff7..6baed2a418 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4066,6 +4066,10 @@  ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
  * logical datapath only by creating a datapath group. */
 static bool use_logical_dp_groups = false;
 
+/* If this option is 'true' northd will make use of ct.inv match fields.
+ * Otherwise, it will avoid using it.  The default is true. */
+static bool use_ct_inv_match = true;
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
@@ -5681,12 +5685,13 @@  build_acls(struct ovn_datapath *od, struct hmap *lflows,
          * for deletion (bit 0 of ct_label is set).
          *
          * This is enforced at a higher priority than ACLs can be defined. */
+        char *match = xasprintf("%s(ct.est && ct.rpl && ct_label.blocked == 1)",
+                                use_ct_inv_match ? "ct.inv || " : "");
         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
-                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
-                      "drop;");
+                      match, "drop;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
-                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
-                      "drop;");
+                      match, "drop;");
+        free(match);
 
         /* Ingress and Egress ACL Table (Priority 65535).
          *
@@ -5697,14 +5702,15 @@  build_acls(struct ovn_datapath *od, struct hmap *lflows,
          * direction to hit the currently defined policy from ACLs.
          *
          * This is enforced at a higher priority than ACLs can be defined. */
+        match = xasprintf("ct.est && !ct.rel && !ct.new%s && "
+                          "ct.rpl && ct_label.blocked == 0",
+                          use_ct_inv_match ? " && !ct.inv" : "");
+
         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
-                      "ct.est && !ct.rel && !ct.new && !ct.inv "
-                      "&& ct.rpl && ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
-                      "ct.est && !ct.rel && !ct.new && !ct.inv "
-                      "&& ct.rpl && ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
+        free(match);
 
         /* Ingress and Egress ACL Table (Priority 65535).
          *
@@ -5717,14 +5723,14 @@  build_acls(struct ovn_datapath *od, struct hmap *lflows,
          * a dynamically negotiated FTP data channel), but will allow
          * related traffic such as an ICMP Port Unreachable through
          * that's generated from a non-listening UDP port.  */
+        match = xasprintf("!ct.est && ct.rel && !ct.new%s && "
+                          "ct_label.blocked == 0",
+                          use_ct_inv_match ? " && !ct.inv" : "");
         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
-                      "!ct.est && ct.rel && !ct.new && !ct.inv "
-                      "&& ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
-                      "!ct.est && ct.rel && !ct.new && !ct.inv "
-                      "&& ct_label.blocked == 0",
-                      "next;");
+                      match, "next;");
+        free(match);
 
         /* Ingress and Egress ACL Table (Priority 65535).
          *
@@ -12897,6 +12903,9 @@  ovnnb_db_run(struct northd_context *ctx,
 
     use_logical_dp_groups = smap_get_bool(&nb->options,
                                           "use_logical_dp_groups", false);
+    use_ct_inv_match = smap_get_bool(&nb->options,
+                                     "use_ct_inv_match", true);
+
     /* deprecated, use --event instead */
     controller_event_en = smap_get_bool(&nb->options,
                                         "controller_event", false);
diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl
index 35e6ab76cb..9e1919cd80 100644
--- a/northd/ovn_northd.dl
+++ b/northd/ovn_northd.dl
@@ -2396,16 +2396,25 @@  var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
          * for deletion (bit 0 of ct_label is set).
          *
          * This is enforced at a higher priority than ACLs can be defined. */
+        var __match = match (sw.use_ct_inv_match) {
+             true -> "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
+             false -> "(ct.est && ct.rpl && ct_label.blocked == 1)"
+        } in
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(IN, ACL),
              .priority         = 65535,
-             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
+             .__match          = __match,
              .actions          = "drop;",
              .external_ids     = map_empty());
+
+        var __match = match (sw.use_ct_inv_match) {
+             true -> "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
+             false -> "(ct.est && ct.rpl && ct_label.blocked == 1)"
+        } in
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(OUT, ACL),
              .priority         = 65535,
-             .__match          = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
+             .__match          = __match,
              .actions          = "drop;",
              .external_ids     = map_empty());
 
@@ -2418,18 +2427,29 @@  var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
          * direction to hit the currently defined policy from ACLs.
          *
          * This is enforced at a higher priority than ACLs can be defined. */
+        var __match = match (sw.use_ct_inv_match) {
+             true -> "ct.est && !ct.rel && !ct.new && !ct.inv "
+                     "&& ct.rpl && ct_label.blocked == 0",
+             false -> "ct.est && !ct.rel && !ct.new "
+                      "&& ct.rpl && ct_label.blocked == 0"
+        } in
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(IN, ACL),
              .priority         = 65535,
-             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
-                                 "&& ct.rpl && ct_label.blocked == 0",
+             .__match          = __match,
              .actions          = "next;",
              .external_ids     = map_empty());
+
+        var __match = match (sw.use_ct_inv_match) {
+             true -> "ct.est && !ct.rel && !ct.new && !ct.inv "
+                     "&& ct.rpl && ct_label.blocked == 0",
+             false -> "ct.est && !ct.rel && !ct.new "
+                      "&& ct.rpl && ct_label.blocked == 0"
+        } in
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(OUT, ACL),
              .priority         = 65535,
-             .__match          = "ct.est && !ct.rel && !ct.new && !ct.inv "
-                                 "&& ct.rpl && ct_label.blocked == 0",
+             .__match          = __match,
              .actions          = "next;",
              .external_ids     = map_empty());
 
@@ -2444,18 +2464,29 @@  var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in
          * a dynamically negotiated FTP data channel), but will allow
          * related traffic such as an ICMP Port Unreachable through
          * that's generated from a non-listening UDP port.  */
+        var __match = match (sw.use_ct_inv_match) {
+             true -> "!ct.est && ct.rel && !ct.new && !ct.inv "
+                     "&& ct_label.blocked == 0",
+             false -> "!ct.est && ct.rel && !ct.new "
+                      "&& ct_label.blocked == 0",
+        } in
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(IN, ACL),
              .priority         = 65535,
-             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
-                                 "&& ct_label.blocked == 0",
+             .__match          = __match,
              .actions          = "next;",
              .external_ids     = map_empty());
+
+        var __match = match (sw.use_ct_inv_match) {
+             true -> "!ct.est && ct.rel && !ct.new && !ct.inv "
+                     "&& ct_label.blocked == 0",
+             false -> "!ct.est && ct.rel && !ct.new "
+                      "&& ct_label.blocked == 0",
+        } in
         Flow(.logical_datapath = ls._uuid,
              .stage            = switch_stage(OUT, ACL),
              .priority         = 65535,
-             .__match          = "!ct.est && ct.rel && !ct.new && !ct.inv "
-                                 "&& ct_label.blocked == 0",
+             .__match          = __match,
              .actions          = "next;",
              .external_ids     = map_empty());
 
diff --git a/ovn-nb.xml b/ovn-nb.xml
index b0a4adffe5..c3ff3b586a 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -227,6 +227,17 @@ 
         </p>
       </column>
 
+      <column name="options" key="use_ct_inv_match">
+        <p>
+          If set to false, <code>ovn-northd</code> will not use the
+          <code>ct.inv</code> field in any of the logical flow matches.
+          The default value is true.  CMS can consider setting this to
+          false, in case the NIC doesn't support offloading OVS datapath
+          flows having matches <code>ct_state(..+inv..)</code> or
+          <code>ct_state(..-inv..)</code>.
+        </p>
+      </column>
+
       <group title="Options for configuring interconnection route advertisement">
         <p>
           These options control how routes are advertised between OVN
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 32f956a797..3a3a443a4e 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -3066,3 +3066,80 @@  AT_CHECK([grep "ls_out_stateful" sw0flows | sort], [0], [dnl
 
 AT_CLEANUP 
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn -- ct.inv usage])
+ovn_start
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl lsp-add sw0 sw0p1
+
+check ovn-nbctl --wait=sb acl-add sw0 to-lport 1002 ip allow-related
+
+ovn-sbctl dump-flows sw0 > sw0flows
+AT_CAPTURE_FILE([sw0flows])
+
+AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 65535 | sort], [0], [dnl
+  table=9 (ls_in_acl          ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
+  table=9 (ls_in_acl          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
+  table=9 (ls_in_acl          ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
+  table=9 (ls_in_acl          ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
+])
+
+AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 65535 | sort], [0], [dnl
+  table=4 (ls_out_acl         ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
+  table=4 (ls_out_acl         ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
+  table=4 (ls_out_acl         ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
+  table=4 (ls_out_acl         ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
+])
+
+# Disable ct.inv usage.
+check ovn-nbctl --wait=sb set NB_Global . options:use_ct_inv_match=false
+
+ovn-sbctl dump-flows sw0 > sw0flows
+AT_CAPTURE_FILE([sw0flows])
+
+AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 65535 | sort], [0], [dnl
+  table=9 (ls_in_acl          ), priority=65535, match=(!ct.est && ct.rel && !ct.new && ct_label.blocked == 0), action=(next;)
+  table=9 (ls_in_acl          ), priority=65535, match=((ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
+  table=9 (ls_in_acl          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && ct.rpl && ct_label.blocked == 0), action=(next;)
+  table=9 (ls_in_acl          ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
+])
+
+AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 65535 | sort], [0], [dnl
+  table=4 (ls_out_acl         ), priority=65535, match=(!ct.est && ct.rel && !ct.new && ct_label.blocked == 0), action=(next;)
+  table=4 (ls_out_acl         ), priority=65535, match=((ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
+  table=4 (ls_out_acl         ), priority=65535, match=(ct.est && !ct.rel && !ct.new && ct.rpl && ct_label.blocked == 0), action=(next;)
+  table=4 (ls_out_acl         ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
+])
+
+AT_CHECK([grep -c "ct.inv" sw0flows], [1], [dnl
+0
+])
+
+# Enable ct.inv usage.
+check ovn-nbctl --wait=sb set NB_Global . options:use_ct_inv_match=true
+
+ovn-sbctl dump-flows sw0 > sw0flows
+AT_CAPTURE_FILE([sw0flows])
+
+AT_CHECK([grep -w "ls_in_acl" sw0flows | grep 65535 | sort], [0], [dnl
+  table=9 (ls_in_acl          ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
+  table=9 (ls_in_acl          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
+  table=9 (ls_in_acl          ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
+  table=9 (ls_in_acl          ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
+])
+
+AT_CHECK([grep -w "ls_out_acl" sw0flows | grep 65535 | sort], [0], [dnl
+  table=4 (ls_out_acl         ), priority=65535, match=(!ct.est && ct.rel && !ct.new && !ct.inv && ct_label.blocked == 0), action=(next;)
+  table=4 (ls_out_acl         ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_label.blocked == 0), action=(next;)
+  table=4 (ls_out_acl         ), priority=65535, match=(ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)), action=(drop;)
+  table=4 (ls_out_acl         ), priority=65535, match=(nd || nd_ra || nd_rs || mldv1 || mldv2), action=(next;)
+])
+
+AT_CHECK([grep -c "ct.inv" sw0flows], [0], [dnl
+6
+])
+
+AT_CLEANUP
+])