diff mbox series

[ovs-dev,RFC,1/2] dpif: Add support for CT flush with partial tuple

Message ID 20221006062559.7876-1-amusil@redhat.com
State RFC
Headers show
Series [ovs-dev,RFC,1/2] dpif: Add support for CT flush with partial tuple | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Ales Musil Oct. 6, 2022, 6:25 a.m. UTC
Curreently in order to flush conntrack you would need to
specify full 5-tuple. Add support for partial match
it still has some limitations however it is capable of flushing
all that match specified field e.g. source ip address.

Reported-at: https://bugzilla.redhat.com/2120546
Signed-off-by: Ales Musil <amusil@redhat.com>
---
 NEWS                    |   2 +
 lib/ct-dpif.c           | 178 +++++++++++++++++++++++++++++++---------
 lib/dpctl.man           |   3 +-
 tests/system-traffic.at |  84 ++++++++++++++++++-
 4 files changed, 226 insertions(+), 41 deletions(-)

Comments

Paolo Valerio Oct. 17, 2022, 4:49 p.m. UTC | #1
Hello Ales,

overall the approach is ok, the only concern is that, unless I'm missing
something, in case of many connections, the exact match deletion could
potentially take a while, whereas in the previous case the cost
was basically a lookup (constant time) and of course the remaining
deletion operation.

It would be nice to avoid the extra cost when the whole 5-tuple is
specified. WDYT?

Ales Musil <amusil@redhat.com> writes:

> Curreently in order to flush conntrack you would need to
> specify full 5-tuple. Add support for partial match
> it still has some limitations however it is capable of flushing
> all that match specified field e.g. source ip address.
>
> Reported-at: https://bugzilla.redhat.com/2120546
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
>  NEWS                    |   2 +
>  lib/ct-dpif.c           | 178 +++++++++++++++++++++++++++++++---------
>  lib/dpctl.man           |   3 +-
>  tests/system-traffic.at |  84 ++++++++++++++++++-
>  4 files changed, 226 insertions(+), 41 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index ff77ee404..81909812e 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -23,6 +23,8 @@ Post-v3.0.0
>         bug and CVE fixes addressed since its release.
>         If a user wishes to benefit from these fixes it is recommended to use
>         DPDK 21.11.2.
> +   - ovs-dpctl and related ovs-appctl commands:
> +     * "flush-conntrack" is capable of handling partial 5-tuple.
>  
>  
>  v3.0.0 - 15 Aug 2022
> diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
> index cfc2315e3..57995f5e5 100644
> --- a/lib/ct-dpif.c
> +++ b/lib/ct-dpif.c
> @@ -18,6 +18,8 @@
>  #include "dpif-provider.h"
>  
>  #include <errno.h>
> +#include <sys/types.h>
> +#include <netinet/in.h>
>  
>  #include "ct-dpif.h"
>  #include "openvswitch/ofp-parse.h"
> @@ -109,7 +111,113 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
>              ? dpif->dpif_class->ct_dump_done(dpif, dump)
>              : EOPNOTSUPP);
>  }
> -

was this intentional?
Just checking

> +
> +static inline bool
> +ct_dpif_inet_addr_cmp_partial(const union ct_dpif_inet_addr *partial,
> +                              const union ct_dpif_inet_addr *addr)
> +{
> +    if (!ipv6_is_zero(&partial->in6) &&
> +        !ipv6_addr_equals(&partial->in6, &addr->in6)) {
> +        return false;
> +    }
> +    return true;
> +}
> +
> +/* Compares the non-zero members if they match. This is usefull for clearing
> + * up all conntracks specified by a partial tuple. */
> +static inline bool
> +ct_dpif_tuple_cmp_partial(const struct ct_dpif_tuple *partial,
> +                          const struct ct_dpif_tuple *tuple)
> +{
> +    /* There is no point in continuing if both do not use the same eth type. */
> +    if (partial->l3_type != tuple->l3_type) {
> +        return false;
> +    }
> +
> +    if (partial->ip_proto && partial->ip_proto != tuple->ip_proto) {
> +        return false;
> +    }
> +
> +    if (!ct_dpif_inet_addr_cmp_partial(&partial->src, &tuple->src)) {
> +        return false;
> +    }
> +
> +    if (!ct_dpif_inet_addr_cmp_partial(&partial->dst, &tuple->dst)) {
> +        return false;
> +    }
> +
> +    if (partial->ip_proto == IPPROTO_TCP || partial->ip_proto == IPPROTO_UDP) {
> +
> +        if (partial->src_port && partial->src_port != tuple->src_port) {
> +            return false;
> +        }
> +
> +        if (partial->dst_port && partial->dst_port != tuple->dst_port) {
> +            return false;
> +        }
> +    } else if (partial->ip_proto == IPPROTO_ICMP ||
> +               partial->ip_proto == IPPROTO_ICMPV6) {
> +
> +        if (partial->icmp_id != tuple->icmp_id) {
> +            return false;
> +        }
> +
> +        if (partial->icmp_type != tuple->icmp_type) {
> +            return false;
> +        }
> +
> +        if (partial->icmp_code != tuple->icmp_code) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +static int
> +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone,
> +                    const struct ct_dpif_tuple *tuple) {
> +    struct ct_dpif_dump_state *dump;
> +    struct ct_dpif_entry cte;
> +    int error;
> +    int tot_bkts;
> +
> +    if (!dpif->dpif_class->ct_flush) {
> +        return EOPNOTSUPP;
> +    }
> +
> +    if (VLOG_IS_DBG_ENABLED()) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +        ct_dpif_format_tuple(&ds, tuple);
> +        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
> +                  zone ? *zone : 0);
> +        ds_destroy(&ds);
> +    }
> +
> +    error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts);
> +    if (error) {
> +        return error;
> +    }
> +
> +    while (!(error = ct_dpif_dump_next(dump, &cte))) {
> +        if (zone && *zone != cte.zone) {
> +            continue;
> +        }
> +
> +        if (ct_dpif_tuple_cmp_partial(tuple, &cte.tuple_orig)) {
> +            error = dpif->dpif_class->ct_flush(dpif, &cte.zone,
> +                                               &cte.tuple_orig);
> +            if (error) {
> +                break;
> +            }
> +        }
> +    }
> +    if (error == EOF) {
> +        error = 0;
> +    }
> +    ct_dpif_dump_done(dump);
> +    return error;
> +}
>  /* Flush the entries in the connection tracker used by 'dpif'.  The
>   * arguments have the following behavior:
>   *
> @@ -123,11 +231,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
>                const struct ct_dpif_tuple *tuple)
>  {
>      if (tuple) {
> -        struct ds ds = DS_EMPTY_INITIALIZER;
> -        ct_dpif_format_tuple(&ds, tuple);
> -        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
> -                                                zone ? *zone : 0);
> -        ds_destroy(&ds);
> +        return ct_dpif_flush_tuple(dpif, zone, tuple);
>      } else if (zone) {
>          VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
>      } else {
> @@ -581,6 +685,21 @@ ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, int conn_per_state)
>      ds_put_format(ds, "=%u", conn_per_state);
>  }
>  
> +static bool
> +ct_dpif_parse_tuple_ip(union ct_dpif_inet_addr *addr, char *value,
> +                       uint16_t l3_type)
> +{
> +    if (!ipv6_is_zero(&addr->in6)) {
> +        return false;
> +    }
> +    if (l3_type == AF_INET) {
> +        ip_parse(value, &addr->ip);
> +    } else {
> +        ipv6_parse(value, &addr->in6);
> +    }
> +    return true;
> +}
> +
>  /* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
>   * Returns true on success.  Otherwise, returns false and puts the error
>   * message in 'ds'. */
> @@ -590,6 +709,8 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
>      char *pos, *key, *value, *copy;
>      memset(tuple, 0, sizeof *tuple);
>  
> +    tuple->l3_type = AF_INET;
> +
>      pos = copy = xstrdup(s);
>      while (ofputil_parse_key_value(&pos, &key, &value)) {
>          if (!*value) {
> @@ -597,28 +718,17 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
>              goto error;
>          }
>  
> -        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
> -            if (tuple->l3_type && tuple->l3_type != AF_INET) {
> -                ds_put_cstr(ds, "L3 type set multiple times");
> -                goto error;
> -            } else {
> -                tuple->l3_type = AF_INET;
> -            }
> -            if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
> -                                                 &tuple->dst.ip)) {
> -                goto error_with_msg;
> -            }
> -        } else if (!strcmp(key, "ct_ipv6_src") ||
> -                   !strcmp(key, "ct_ipv6_dst")) {
> -            if (tuple->l3_type && tuple->l3_type != AF_INET6) {
> -                ds_put_cstr(ds, "L3 type set multiple times");
> +        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") ||
> +            !strcmp(key, "ct_ipv6_src") || !strcmp(key, "ct_ipv6_dst")) {
> +            tuple->l3_type = key[6] == '6' ? AF_INET6 : AF_INET;
> +            uint8_t index = key[6] == '6' ? 8 : 6;
> +            union ct_dpif_inet_addr *addr = key[index] == 's'
> +                                            ? &tuple->src
> +                                            : &tuple->dst;
> +
> +            if (!ct_dpif_parse_tuple_ip(addr, value, tuple->l3_type)) {
> +                ds_put_format(ds, "%s is set multiple times", key);
>                  goto error;
> -            } else {
> -                tuple->l3_type = AF_INET6;
> -            }
> -            if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
> -                                                   &tuple->dst.in6)) {
> -                goto error_with_msg;
>              }
>          } else if (!strcmp(key, "ct_nw_proto")) {
>              char *err = str_to_u8(value, key, &tuple->ip_proto);
> @@ -665,17 +775,9 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
>          }
>      }
>  
> -    if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) ||
> -        !tuple->ip_proto) {
> -        /* icmp_type, icmp_code, and icmp_id can be 0. */
> -        if (tuple->ip_proto != IPPROTO_ICMP &&
> -            tuple->ip_proto != IPPROTO_ICMPV6) {
> -            if (!tuple->src_port || !tuple->dst_port) {
> -                ds_put_cstr(ds, "at least one of the conntrack 5-tuple fields "
> -                                "is missing.");
> -                goto error;
> -            }
> -        }
> +    if (!tuple->ip_proto && (tuple->src_port || tuple->dst_port)) {
> +        ds_put_cstr(ds, "port is set without protocol");
> +        goto error;
>      }
>  
>      free(copy);
> diff --git a/lib/dpctl.man b/lib/dpctl.man
> index 87ea8087b..b0cabe05d 100644
> --- a/lib/dpctl.man
> +++ b/lib/dpctl.man
> @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in
>  If \fIct-tuple\fR is provided, flushes the connection entry specified by
>  \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided.
>  The userspace connection tracker requires flushing with the original pre-NATed
> -tuple and a warning log will be otherwise generated.
> +tuple and a warning log will be otherwise generated. The tuple can be partial
> +and will remove all connections that are matching on the specified fields.
>  An example of an IPv4 ICMP \fIct-tuple\fR:
>  .IP
>  "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10"
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 731de439c..81b32f348 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -2221,7 +2221,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [],
>  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
>  ])
>  
> -AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
>  
>  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [1], [dnl
>  ])
> @@ -2233,7 +2233,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [0],
>  udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
>  ])
>  
> -AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
>  
>  AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
>  ])
> @@ -2255,6 +2255,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 $ICMP_TUPLE])
>  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [1], [dnl
>  ])
>  
> +dnl Test UDP from port 1 and 2, partial flush by src port
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])

I see a failure for check-kernel target because the entries are shown in
the reverse order. I guess a "| sort" could be needed.

> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by dst port
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by src address
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by dst address
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> -- 
> 2.37.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ales Musil Oct. 20, 2022, 11:47 a.m. UTC | #2
On Mon, Oct 17, 2022 at 6:50 PM Paolo Valerio <pvalerio@redhat.com> wrote:

> Hello Ales,
>

Hi Paolo,
thank you for the feedback.


>
> overall the approach is ok, the only concern is that, unless I'm missing
> something, in case of many connections, the exact match deletion could
> potentially take a while, whereas in the previous case the cost
> was basically a lookup (constant time) and of course the remaining
> deletion operation.
>
> It would be nice to avoid the extra cost when the whole 5-tuple is
> specified. WDYT?
>

Ack, it will be in v1 of formal patch.


>
> Ales Musil <amusil@redhat.com> writes:
>
> > Curreently in order to flush conntrack you would need to
> > specify full 5-tuple. Add support for partial match
> > it still has some limitations however it is capable of flushing
> > all that match specified field e.g. source ip address.
> >
> > Reported-at: https://bugzilla.redhat.com/2120546
> > Signed-off-by: Ales Musil <amusil@redhat.com>
> > ---
> >  NEWS                    |   2 +
> >  lib/ct-dpif.c           | 178 +++++++++++++++++++++++++++++++---------
> >  lib/dpctl.man           |   3 +-
> >  tests/system-traffic.at |  84 ++++++++++++++++++-
> >  4 files changed, 226 insertions(+), 41 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index ff77ee404..81909812e 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -23,6 +23,8 @@ Post-v3.0.0
> >         bug and CVE fixes addressed since its release.
> >         If a user wishes to benefit from these fixes it is recommended
> to use
> >         DPDK 21.11.2.
> > +   - ovs-dpctl and related ovs-appctl commands:
> > +     * "flush-conntrack" is capable of handling partial 5-tuple.
> >
> >
> >  v3.0.0 - 15 Aug 2022
> > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
> > index cfc2315e3..57995f5e5 100644
> > --- a/lib/ct-dpif.c
> > +++ b/lib/ct-dpif.c
> > @@ -18,6 +18,8 @@
> >  #include "dpif-provider.h"
> >
> >  #include <errno.h>
> > +#include <sys/types.h>
> > +#include <netinet/in.h>
> >
> >  #include "ct-dpif.h"
> >  #include "openvswitch/ofp-parse.h"
> > @@ -109,7 +111,113 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
> >              ? dpif->dpif_class->ct_dump_done(dpif, dump)
> >              : EOPNOTSUPP);
> >  }
> > -
>
> was this intentional?
> Just checking
>
> > +
> > +static inline bool
> > +ct_dpif_inet_addr_cmp_partial(const union ct_dpif_inet_addr *partial,
> > +                              const union ct_dpif_inet_addr *addr)
> > +{
> > +    if (!ipv6_is_zero(&partial->in6) &&
> > +        !ipv6_addr_equals(&partial->in6, &addr->in6)) {
> > +        return false;
> > +    }
> > +    return true;
> > +}
> > +
> > +/* Compares the non-zero members if they match. This is usefull for
> clearing
> > + * up all conntracks specified by a partial tuple. */
> > +static inline bool
> > +ct_dpif_tuple_cmp_partial(const struct ct_dpif_tuple *partial,
> > +                          const struct ct_dpif_tuple *tuple)
> > +{
> > +    /* There is no point in continuing if both do not use the same eth
> type. */
> > +    if (partial->l3_type != tuple->l3_type) {
> > +        return false;
> > +    }
> > +
> > +    if (partial->ip_proto && partial->ip_proto != tuple->ip_proto) {
> > +        return false;
> > +    }
> > +
> > +    if (!ct_dpif_inet_addr_cmp_partial(&partial->src, &tuple->src)) {
> > +        return false;
> > +    }
> > +
> > +    if (!ct_dpif_inet_addr_cmp_partial(&partial->dst, &tuple->dst)) {
> > +        return false;
> > +    }
> > +
> > +    if (partial->ip_proto == IPPROTO_TCP || partial->ip_proto ==
> IPPROTO_UDP) {
> > +
> > +        if (partial->src_port && partial->src_port != tuple->src_port) {
> > +            return false;
> > +        }
> > +
> > +        if (partial->dst_port && partial->dst_port != tuple->dst_port) {
> > +            return false;
> > +        }
> > +    } else if (partial->ip_proto == IPPROTO_ICMP ||
> > +               partial->ip_proto == IPPROTO_ICMPV6) {
> > +
> > +        if (partial->icmp_id != tuple->icmp_id) {
> > +            return false;
> > +        }
> > +
> > +        if (partial->icmp_type != tuple->icmp_type) {
> > +            return false;
> > +        }
> > +
> > +        if (partial->icmp_code != tuple->icmp_code) {
> > +            return false;
> > +        }
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +static int
> > +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone,
> > +                    const struct ct_dpif_tuple *tuple) {
> > +    struct ct_dpif_dump_state *dump;
> > +    struct ct_dpif_entry cte;
> > +    int error;
> > +    int tot_bkts;
> > +
> > +    if (!dpif->dpif_class->ct_flush) {
> > +        return EOPNOTSUPP;
> > +    }
> > +
> > +    if (VLOG_IS_DBG_ENABLED()) {
> > +        struct ds ds = DS_EMPTY_INITIALIZER;
> > +        ct_dpif_format_tuple(&ds, tuple);
> > +        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif),
> ds_cstr(&ds),
> > +                  zone ? *zone : 0);
> > +        ds_destroy(&ds);
> > +    }
> > +
> > +    error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts);
> > +    if (error) {
> > +        return error;
> > +    }
> > +
> > +    while (!(error = ct_dpif_dump_next(dump, &cte))) {
> > +        if (zone && *zone != cte.zone) {
> > +            continue;
> > +        }
> > +
> > +        if (ct_dpif_tuple_cmp_partial(tuple, &cte.tuple_orig)) {
> > +            error = dpif->dpif_class->ct_flush(dpif, &cte.zone,
> > +                                               &cte.tuple_orig);
> > +            if (error) {
> > +                break;
> > +            }
> > +        }
> > +    }
> > +    if (error == EOF) {
> > +        error = 0;
> > +    }
> > +    ct_dpif_dump_done(dump);
> > +    return error;
> > +}
> >  /* Flush the entries in the connection tracker used by 'dpif'.  The
> >   * arguments have the following behavior:
> >   *
> > @@ -123,11 +231,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t
> *zone,
> >                const struct ct_dpif_tuple *tuple)
> >  {
> >      if (tuple) {
> > -        struct ds ds = DS_EMPTY_INITIALIZER;
> > -        ct_dpif_format_tuple(&ds, tuple);
> > -        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif),
> ds_cstr(&ds),
> > -                                                zone ? *zone : 0);
> > -        ds_destroy(&ds);
> > +        return ct_dpif_flush_tuple(dpif, zone, tuple);
> >      } else if (zone) {
> >          VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
> >      } else {
> > @@ -581,6 +685,21 @@ ct_dpif_format_tcp_stat(struct ds * ds, int
> tcp_state, int conn_per_state)
> >      ds_put_format(ds, "=%u", conn_per_state);
> >  }
> >
> > +static bool
> > +ct_dpif_parse_tuple_ip(union ct_dpif_inet_addr *addr, char *value,
> > +                       uint16_t l3_type)
> > +{
> > +    if (!ipv6_is_zero(&addr->in6)) {
> > +        return false;
> > +    }
> > +    if (l3_type == AF_INET) {
> > +        ip_parse(value, &addr->ip);
> > +    } else {
> > +        ipv6_parse(value, &addr->in6);
> > +    }
> > +    return true;
> > +}
> > +
> >  /* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
> >   * Returns true on success.  Otherwise, returns false and puts the error
> >   * message in 'ds'. */
> > @@ -590,6 +709,8 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple,
> const char *s, struct ds *ds)
> >      char *pos, *key, *value, *copy;
> >      memset(tuple, 0, sizeof *tuple);
> >
> > +    tuple->l3_type = AF_INET;
> > +
> >      pos = copy = xstrdup(s);
> >      while (ofputil_parse_key_value(&pos, &key, &value)) {
> >          if (!*value) {
> > @@ -597,28 +718,17 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple,
> const char *s, struct ds *ds)
> >              goto error;
> >          }
> >
> > -        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
> > -            if (tuple->l3_type && tuple->l3_type != AF_INET) {
> > -                ds_put_cstr(ds, "L3 type set multiple times");
> > -                goto error;
> > -            } else {
> > -                tuple->l3_type = AF_INET;
> > -            }
> > -            if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
> > -                                                 &tuple->dst.ip)) {
> > -                goto error_with_msg;
> > -            }
> > -        } else if (!strcmp(key, "ct_ipv6_src") ||
> > -                   !strcmp(key, "ct_ipv6_dst")) {
> > -            if (tuple->l3_type && tuple->l3_type != AF_INET6) {
> > -                ds_put_cstr(ds, "L3 type set multiple times");
> > +        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") ||
> > +            !strcmp(key, "ct_ipv6_src") || !strcmp(key, "ct_ipv6_dst"))
> {
> > +            tuple->l3_type = key[6] == '6' ? AF_INET6 : AF_INET;
> > +            uint8_t index = key[6] == '6' ? 8 : 6;
> > +            union ct_dpif_inet_addr *addr = key[index] == 's'
> > +                                            ? &tuple->src
> > +                                            : &tuple->dst;
> > +
> > +            if (!ct_dpif_parse_tuple_ip(addr, value, tuple->l3_type)) {
> > +                ds_put_format(ds, "%s is set multiple times", key);
> >                  goto error;
> > -            } else {
> > -                tuple->l3_type = AF_INET6;
> > -            }
> > -            if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
> > -                                                   &tuple->dst.in6)) {
> > -                goto error_with_msg;
> >              }
> >          } else if (!strcmp(key, "ct_nw_proto")) {
> >              char *err = str_to_u8(value, key, &tuple->ip_proto);
> > @@ -665,17 +775,9 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple,
> const char *s, struct ds *ds)
> >          }
> >      }
> >
> > -    if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6)
> ||
> > -        !tuple->ip_proto) {
> > -        /* icmp_type, icmp_code, and icmp_id can be 0. */
> > -        if (tuple->ip_proto != IPPROTO_ICMP &&
> > -            tuple->ip_proto != IPPROTO_ICMPV6) {
> > -            if (!tuple->src_port || !tuple->dst_port) {
> > -                ds_put_cstr(ds, "at least one of the conntrack 5-tuple
> fields "
> > -                                "is missing.");
> > -                goto error;
> > -            }
> > -        }
> > +    if (!tuple->ip_proto && (tuple->src_port || tuple->dst_port)) {
> > +        ds_put_cstr(ds, "port is set without protocol");
> > +        goto error;
> >      }
> >
> >      free(copy);
> > diff --git a/lib/dpctl.man b/lib/dpctl.man
> > index 87ea8087b..b0cabe05d 100644
> > --- a/lib/dpctl.man
> > +++ b/lib/dpctl.man
> > @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes
> the connections in
> >  If \fIct-tuple\fR is provided, flushes the connection entry specified by
> >  \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not
> provided.
> >  The userspace connection tracker requires flushing with the original
> pre-NATed
> > -tuple and a warning log will be otherwise generated.
> > +tuple and a warning log will be otherwise generated. The tuple can be
> partial
> > +and will remove all connections that are matching on the specified
> fields.
> >  An example of an IPv4 ICMP \fIct-tuple\fR:
> >  .IP
> >
> "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10"
> > diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> > index 731de439c..81b32f348 100644
> > --- a/tests/system-traffic.at
> > +++ b/tests/system-traffic.at
> > @@ -2221,7 +2221,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep
> "orig=.src=10\.1\.1\.1,"], [],
> >
> udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> >  ])
> >
> > -AT_CHECK([ovs-appctl dpctl/flush-conntrack
> 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack
> 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
> >
> >  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep
> "orig=.src=10\.1\.1\.1,"], [1], [dnl
> >  ])
> > @@ -2233,7 +2233,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep
> "orig=.src=10\.1\.1\.2,"], [0],
> >
> udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> >  ])
> >
> > -AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5
> 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5
> 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
> >
> >  AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0],
> [dnl
> >  ])
> > @@ -2255,6 +2255,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5
> $ICMP_TUPLE])
> >  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep
> "orig=.src=10\.1\.1\.2,"], [1], [dnl
> >  ])
> >
> > +dnl Test UDP from port 1 and 2, partial flush by src port
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> > +
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
>
> I see a failure for check-kernel target because the entries are shown in
> the reverse order. I guess a "| sort" could be needed.
>

Ah right, done in v1, thanks.


>
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack
> 'ct_nw_proto=17,ct_tp_src=1'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack
> 'ct_nw_proto=17,ct_tp_src=2'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> > +
> > +dnl Test UDP from port 1 and 2, partial flush by dst port
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> > +
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack
> 'ct_nw_proto=17,ct_tp_dst=2'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack
> 'ct_nw_proto=17,ct_tp_dst=1'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> > +
> > +dnl Test UDP from port 1 and 2, partial flush by src address
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> > +
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> > +
> > +dnl Test UDP from port 1 and 2, partial flush by dst address
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
> actions=resubmit(,0)"])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
> actions=resubmit(,0)"])
> > +
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0],
> [dnl
> >
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> > +])
> > +
> > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1'])
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> > +
> >  OVS_TRAFFIC_VSWITCHD_STOP
> >  AT_CLEANUP
> >
> > --
> > 2.37.2
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Thanks,
Ales
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index ff77ee404..81909812e 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,8 @@  Post-v3.0.0
        bug and CVE fixes addressed since its release.
        If a user wishes to benefit from these fixes it is recommended to use
        DPDK 21.11.2.
+   - ovs-dpctl and related ovs-appctl commands:
+     * "flush-conntrack" is capable of handling partial 5-tuple.
 
 
 v3.0.0 - 15 Aug 2022
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index cfc2315e3..57995f5e5 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -18,6 +18,8 @@ 
 #include "dpif-provider.h"
 
 #include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
 
 #include "ct-dpif.h"
 #include "openvswitch/ofp-parse.h"
@@ -109,7 +111,113 @@  ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
             ? dpif->dpif_class->ct_dump_done(dpif, dump)
             : EOPNOTSUPP);
 }
-
+
+static inline bool
+ct_dpif_inet_addr_cmp_partial(const union ct_dpif_inet_addr *partial,
+                              const union ct_dpif_inet_addr *addr)
+{
+    if (!ipv6_is_zero(&partial->in6) &&
+        !ipv6_addr_equals(&partial->in6, &addr->in6)) {
+        return false;
+    }
+    return true;
+}
+
+/* Compares the non-zero members if they match. This is usefull for clearing
+ * up all conntracks specified by a partial tuple. */
+static inline bool
+ct_dpif_tuple_cmp_partial(const struct ct_dpif_tuple *partial,
+                          const struct ct_dpif_tuple *tuple)
+{
+    /* There is no point in continuing if both do not use the same eth type. */
+    if (partial->l3_type != tuple->l3_type) {
+        return false;
+    }
+
+    if (partial->ip_proto && partial->ip_proto != tuple->ip_proto) {
+        return false;
+    }
+
+    if (!ct_dpif_inet_addr_cmp_partial(&partial->src, &tuple->src)) {
+        return false;
+    }
+
+    if (!ct_dpif_inet_addr_cmp_partial(&partial->dst, &tuple->dst)) {
+        return false;
+    }
+
+    if (partial->ip_proto == IPPROTO_TCP || partial->ip_proto == IPPROTO_UDP) {
+
+        if (partial->src_port && partial->src_port != tuple->src_port) {
+            return false;
+        }
+
+        if (partial->dst_port && partial->dst_port != tuple->dst_port) {
+            return false;
+        }
+    } else if (partial->ip_proto == IPPROTO_ICMP ||
+               partial->ip_proto == IPPROTO_ICMPV6) {
+
+        if (partial->icmp_id != tuple->icmp_id) {
+            return false;
+        }
+
+        if (partial->icmp_type != tuple->icmp_type) {
+            return false;
+        }
+
+        if (partial->icmp_code != tuple->icmp_code) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static int
+ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone,
+                    const struct ct_dpif_tuple *tuple) {
+    struct ct_dpif_dump_state *dump;
+    struct ct_dpif_entry cte;
+    int error;
+    int tot_bkts;
+
+    if (!dpif->dpif_class->ct_flush) {
+        return EOPNOTSUPP;
+    }
+
+    if (VLOG_IS_DBG_ENABLED()) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+        ct_dpif_format_tuple(&ds, tuple);
+        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
+                  zone ? *zone : 0);
+        ds_destroy(&ds);
+    }
+
+    error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts);
+    if (error) {
+        return error;
+    }
+
+    while (!(error = ct_dpif_dump_next(dump, &cte))) {
+        if (zone && *zone != cte.zone) {
+            continue;
+        }
+
+        if (ct_dpif_tuple_cmp_partial(tuple, &cte.tuple_orig)) {
+            error = dpif->dpif_class->ct_flush(dpif, &cte.zone,
+                                               &cte.tuple_orig);
+            if (error) {
+                break;
+            }
+        }
+    }
+    if (error == EOF) {
+        error = 0;
+    }
+    ct_dpif_dump_done(dump);
+    return error;
+}
 /* Flush the entries in the connection tracker used by 'dpif'.  The
  * arguments have the following behavior:
  *
@@ -123,11 +231,7 @@  ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
               const struct ct_dpif_tuple *tuple)
 {
     if (tuple) {
-        struct ds ds = DS_EMPTY_INITIALIZER;
-        ct_dpif_format_tuple(&ds, tuple);
-        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
-                                                zone ? *zone : 0);
-        ds_destroy(&ds);
+        return ct_dpif_flush_tuple(dpif, zone, tuple);
     } else if (zone) {
         VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
     } else {
@@ -581,6 +685,21 @@  ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, int conn_per_state)
     ds_put_format(ds, "=%u", conn_per_state);
 }
 
+static bool
+ct_dpif_parse_tuple_ip(union ct_dpif_inet_addr *addr, char *value,
+                       uint16_t l3_type)
+{
+    if (!ipv6_is_zero(&addr->in6)) {
+        return false;
+    }
+    if (l3_type == AF_INET) {
+        ip_parse(value, &addr->ip);
+    } else {
+        ipv6_parse(value, &addr->in6);
+    }
+    return true;
+}
+
 /* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
  * Returns true on success.  Otherwise, returns false and puts the error
  * message in 'ds'. */
@@ -590,6 +709,8 @@  ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
     char *pos, *key, *value, *copy;
     memset(tuple, 0, sizeof *tuple);
 
+    tuple->l3_type = AF_INET;
+
     pos = copy = xstrdup(s);
     while (ofputil_parse_key_value(&pos, &key, &value)) {
         if (!*value) {
@@ -597,28 +718,17 @@  ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
             goto error;
         }
 
-        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
-            if (tuple->l3_type && tuple->l3_type != AF_INET) {
-                ds_put_cstr(ds, "L3 type set multiple times");
-                goto error;
-            } else {
-                tuple->l3_type = AF_INET;
-            }
-            if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
-                                                 &tuple->dst.ip)) {
-                goto error_with_msg;
-            }
-        } else if (!strcmp(key, "ct_ipv6_src") ||
-                   !strcmp(key, "ct_ipv6_dst")) {
-            if (tuple->l3_type && tuple->l3_type != AF_INET6) {
-                ds_put_cstr(ds, "L3 type set multiple times");
+        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") ||
+            !strcmp(key, "ct_ipv6_src") || !strcmp(key, "ct_ipv6_dst")) {
+            tuple->l3_type = key[6] == '6' ? AF_INET6 : AF_INET;
+            uint8_t index = key[6] == '6' ? 8 : 6;
+            union ct_dpif_inet_addr *addr = key[index] == 's'
+                                            ? &tuple->src
+                                            : &tuple->dst;
+
+            if (!ct_dpif_parse_tuple_ip(addr, value, tuple->l3_type)) {
+                ds_put_format(ds, "%s is set multiple times", key);
                 goto error;
-            } else {
-                tuple->l3_type = AF_INET6;
-            }
-            if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
-                                                   &tuple->dst.in6)) {
-                goto error_with_msg;
             }
         } else if (!strcmp(key, "ct_nw_proto")) {
             char *err = str_to_u8(value, key, &tuple->ip_proto);
@@ -665,17 +775,9 @@  ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
         }
     }
 
-    if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) ||
-        !tuple->ip_proto) {
-        /* icmp_type, icmp_code, and icmp_id can be 0. */
-        if (tuple->ip_proto != IPPROTO_ICMP &&
-            tuple->ip_proto != IPPROTO_ICMPV6) {
-            if (!tuple->src_port || !tuple->dst_port) {
-                ds_put_cstr(ds, "at least one of the conntrack 5-tuple fields "
-                                "is missing.");
-                goto error;
-            }
-        }
+    if (!tuple->ip_proto && (tuple->src_port || tuple->dst_port)) {
+        ds_put_cstr(ds, "port is set without protocol");
+        goto error;
     }
 
     free(copy);
diff --git a/lib/dpctl.man b/lib/dpctl.man
index 87ea8087b..b0cabe05d 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -312,7 +312,8 @@  If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in
 If \fIct-tuple\fR is provided, flushes the connection entry specified by
 \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided.
 The userspace connection tracker requires flushing with the original pre-NATed
-tuple and a warning log will be otherwise generated.
+tuple and a warning log will be otherwise generated. The tuple can be partial
+and will remove all connections that are matching on the specified fields.
 An example of an IPv4 ICMP \fIct-tuple\fR:
 .IP
 "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10"
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 731de439c..81b32f348 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -2221,7 +2221,7 @@  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [],
 udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
 ])
 
-AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [1], [dnl
 ])
@@ -2233,7 +2233,7 @@  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [0],
 udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
 ])
 
-AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
+AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
 ])
@@ -2255,6 +2255,86 @@  AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 $ICMP_TUPLE])
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [1], [dnl
 ])
 
+dnl Test UDP from port 1 and 2, partial flush by src port
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
+
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=2'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
+
+dnl Test UDP from port 1 and 2, partial flush by dst port
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
+
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=2'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=1'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
+
+dnl Test UDP from port 1 and 2, partial flush by src address
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
+
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
+
+dnl Test UDP from port 1 and 2, partial flush by dst address
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
+
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP