diff mbox

[ovs-dev,v3,7/9] datapath-windows: Add support for dump-conntrack in datapath

Message ID 1466795009-5328-8-git-send-email-vsairam@vmware.com
State Superseded
Delegated to: Guru Shetty
Headers show

Commit Message

Sairam Venugopal June 24, 2016, 7:03 p.m. UTC
Create the methods used for dumping conntrack entries from the hyper-v
datapath to userspace by means of netfilter netlink messages. Some of the
attributes are not supported by the datapath and have been defaulted to 0.

Signed-off-by: Sairam Venugopal <vsairam@vmware.com>
Acked-by: Paul-Daniel Boca <pboca@cloudbasesolutions.com>
---
 datapath-windows/ovsext/Conntrack-tcp.c |  59 +++++
 datapath-windows/ovsext/Conntrack.c     | 447 +++++++++++++++++++++++++++++++-
 datapath-windows/ovsext/Conntrack.h     |   5 +
 datapath-windows/ovsext/Util.h          |   4 +
 4 files changed, 512 insertions(+), 3 deletions(-)

Comments

Nithin Raju June 28, 2016, 11:52 p.m. UTC | #1
Looks good but for a few comments.

Acked-by: Nithin Raju <nithin@vmware.com>

-----Original Message-----
From: dev <dev-bounces@openvswitch.org> on behalf of Sairam Venugopal
<vsairam@vmware.com>
Date: Friday, June 24, 2016 at 12:03 PM
To: "dev@openvswitch.org" <dev@openvswitch.org>
Subject: [ovs-dev] [PATCH v3 7/9] datapath-windows: Add support
for	dump-conntrack in datapath

>Create the methods used for dumping conntrack entries from the hyper-v
>datapath to userspace by means of netfilter netlink messages. Some of the
>attributes are not supported by the datapath and have been defaulted to 0.
>
>Signed-off-by: Sairam Venugopal <vsairam@vmware.com>
>Acked-by: Paul-Daniel Boca <pboca@cloudbasesolutions.com>
>---
> datapath-windows/ovsext/Conntrack-tcp.c |  59 +++++
> datapath-windows/ovsext/Conntrack.c     | 447
>+++++++++++++++++++++++++++++++-
> datapath-windows/ovsext/Conntrack.h     |   5 +
> datapath-windows/ovsext/Util.h          |   4 +
> 4 files changed, 512 insertions(+), 3 deletions(-)
>
>diff --git a/datapath-windows/ovsext/Conntrack-tcp.c
>b/datapath-windows/ovsext/Conntrack-tcp.c
>index 19925c3..1d60323 100644
>--- a/datapath-windows/ovsext/Conntrack-tcp.c
>+++ b/datapath-windows/ovsext/Conntrack-tcp.c
>@@ -523,3 +523,62 @@ OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
> 
>     return &newconn->up;
> }
>+
>+static __inline uint8_t
>+OvsCtTcpPeerToProtoInfoFlags(const struct tcp_peer *peer)
>+{
>+    uint8_t res = 0;
>+
>+    if (peer->wscale & CT_WSCALE_FLAG) {
>+        res |= CT_DPIF_TCPF_WINDOW_SCALE;
>+    }
>+
>+    if (peer->wscale & CT_WSCALE_UNKNOWN) {
>+        res |= CT_DPIF_TCPF_BE_LIBERAL;
>+    }
>+
>+    return res;
>+}
>+
>+NDIS_STATUS
>+OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf, OVS_CT_ENTRY *conn_)
>+{
>+    struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
>+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>+    UINT32 offset = 0;
>+
>+    offset = NlMsgStartNested(nlBuf, CTA_PROTOINFO_TCP);
>+    if (!offset) {
>+        return NDIS_STATUS_FAILURE;
>+    }
>+
>+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTOINFO_TCP_STATE,
>+                        conn->peer[0].state)) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
>+                        (conn->peer[0].wscale & CT_WSCALE_MASK))) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTOINFO_TCP_WSCALE_REPLY,
>+                        (conn->peer[1].wscale & CT_WSCALE_MASK))) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+    if (!NlMsgPutTailU16(nlBuf, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
>+                         OvsCtTcpPeerToProtoInfoFlags(&conn->peer[0]))) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+    if (!NlMsgPutTailU16(nlBuf, CTA_PROTOINFO_TCP_FLAGS_REPLY,
>+                         OvsCtTcpPeerToProtoInfoFlags(&conn->peer[1]))) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+
>+done:
>+    NlMsgEndNested(nlBuf, offset);
>+    return status;
>+}
>diff --git a/datapath-windows/ovsext/Conntrack.c
>b/datapath-windows/ovsext/Conntrack.c
>index 15c495d..ebfdeef 100644
>--- a/datapath-windows/ovsext/Conntrack.c
>+++ b/datapath-windows/ovsext/Conntrack.c
>@@ -24,6 +24,10 @@
> #include "PacketParser.h"
> #include "Debug.h"
> 
>+#define WINDOWS_TICK 10000000
>+#define SEC_TO_UNIX_EPOCH 11644473600LL
>+#define SEC_TO_NANOSEC 1000000000LL
>+
> typedef struct _OVS_CT_THREAD_CTX {
>     KEVENT      event;
>     PVOID       threadObject;
>@@ -34,6 +38,7 @@ KSTART_ROUTINE ovsConntrackEntryCleaner;
> static PLIST_ENTRY ovsConntrackTable;
> static OVS_CT_THREAD_CTX ctThreadCtx;
> static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
>+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
> 
> /*
>  
>*-------------------------------------------------------------------------
>---
>@@ -147,11 +152,12 @@ OvsCtUpdateFlowKey(struct OvsFlowKey *key,
> }
> 
> static __inline VOID
>-OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx)
>+OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64
>now)
> {
>     NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
>     NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
>     OvsCtKeyReverse(&entry->rev_key);
>+    entry->timestampStart = now;
>     InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
>                    &entry->link);
> }
>@@ -181,7 +187,10 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
>             state |= OVS_CS_F_NEW;
>             if (commit) {
>                 entry = OvsConntrackCreateTcpEntry(tcp, curNbl,
>currentTime);
>-                OvsCtAddEntry(entry, ctx);
>+                if (!entry) {
>+                    return NULL;
>+                }
>+                OvsCtAddEntry(entry, ctx, currentTime);
>             }
> 
>             OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>@@ -192,7 +201,10 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
>             state |= OVS_CS_F_NEW;
>             if (commit) {
>                 entry = OvsConntrackCreateOtherEntry(currentTime);
>-                OvsCtAddEntry(entry, ctx);
>+                if (!entry) {
>+                    return NULL;
>+                }
>+                OvsCtAddEntry(entry, ctx, currentTime);
>             }
> 
>             OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
>@@ -290,6 +302,18 @@ OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY
>entryKey)
>         (ctxKey.zone == entryKey.zone));
> }
> 
>+static __inline VOID
>+OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply,
>PNET_BUFFER_LIST nbl)
>+{
>+    if (reply) {
>+        entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);
>+        entry->rev_key.packetCount++;
>+    } else {
>+        entry->key.byteCount += OvsPacketLenNBL(nbl);
>+        entry->key.packetCount++;
>+    }
>+}
>+
> static __inline POVS_CT_ENTRY
> OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
> {
>@@ -440,6 +464,9 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
>             ctx->entry = NULL;
>             entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto,
>l4Offset,
>                                      ctx, key, commit, currentTime);
>+            if (!entry) {
>+                return NULL;
>+            }
>             break;
>         }
>     }
>@@ -517,6 +544,7 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl,
>                                  key, commit, currentTime);
>     } else {
>         /* Process the entry and update CT flags */
>+        OvsCtIncrementCounters(entry, ctx.reply, curNbl);
>         entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx,
>key,
>                                          zone, commit, currentTime);
>     }
>@@ -699,3 +727,416 @@ done:
> 
>     return status;
> }
>+
>+static __inline NDIS_STATUS
>+MapIpTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
>+{
>+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>+    UINT32 offset = 0;
>+
>+    offset = NlMsgStartNested(nlBuf, CTA_TUPLE_IP);
>+    if (!offset) {
>+        return NDIS_STATUS_FAILURE;
>+    }
>+
>+    if (key->dl_type == ntohs(ETH_TYPE_IPV4)) {
>+        if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_SRC, key->src.addr.ipv4)) {
>+            status = NDIS_STATUS_FAILURE;
>+            goto done;
>+        }
>+        if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_DST, key->dst.addr.ipv4)) {
>+            status = NDIS_STATUS_FAILURE;
>+            goto done;
>+        }
>+    } else if (key->dl_type == ntohs(ETH_TYPE_IPV6)) {
>+        if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_SRC,
>+                                (PCHAR)(&key->src.addr.ipv6),
>+                                sizeof(key->src.addr.ipv6))) {
>+            status = NDIS_STATUS_FAILURE;
>+            goto done;
>+        }
>+        if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_DST,
>+                                (PCHAR)(&key->dst.addr.ipv6),
>+                                sizeof(key->dst.addr.ipv6))) {
>+            status = NDIS_STATUS_FAILURE;
>+            goto done;
>+        }
>+    }
>+
>+done:
>+    NlMsgEndNested(nlBuf, offset);
>+    return status;
>+}
>+
>+static __inline NDIS_STATUS
>+MapProtoTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
>+{
>+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>+    UINT32 offset = 0;
>+
>+    offset = NlMsgStartNested(nlBuf, CTA_TUPLE_PROTO);
>+    if (!offset) {
>+        return NDIS_STATUS_FAILURE;
>+    }
>+
>+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_NUM, key->nw_proto)) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+
>+    if (key->dl_type == ntohs(ETH_TYPE_IPV4)
>+        || key->dl_type == ntohs(ETH_TYPE_IPV6)) {
>+        /* ICMP and ICMPv6 Type, Code and ID are currently not tracked */
>+        if (key->nw_proto == IPPROTO_ICMP) {
>+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMP_ID, 0)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_TYPE, 0)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_CODE, 0)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+        } else if (key->nw_proto == IPPROTO_ICMP) {

Should be IPPROTO_ICMPv6.

>+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMPV6_ID, 0)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_TYPE, 0)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_CODE, 0)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+        } else {
>+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_SRC_PORT,
>+                                 key->src.port)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }
>+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_DST_PORT,
>+                                 key->dst.port)) {
>+                status = NDIS_STATUS_FAILURE;
>+                goto done;
>+            }

Does this imply that if not icmp or icmpv6, it must be UDP or TCP? Can we
assert in that case?

>+        }
>+    }
>+
>+done:
>+    NlMsgEndNested(nlBuf, offset);
>+    return status;
>+}
>+
>+static __inline NDIS_STATUS
>+MapCtKeyTupleToNl(PNL_BUFFER nlBuf,
>+                  UINT16 tupleType,
>+                  OVS_CT_KEY *key)
>+{
>+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>+    UINT32 offset = 0;
>+
>+    offset = NlMsgStartNested(nlBuf, tupleType);
>+    if (!offset) {
>+        return NDIS_STATUS_FAILURE;
>+    }
>+
>+    status = MapIpTupleToNl(nlBuf, key);
>+    if (status != NDIS_STATUS_SUCCESS) {
>+        goto done;
>+    }
>+
>+    status = MapProtoTupleToNl(nlBuf, key);
>+    if (status != NDIS_STATUS_SUCCESS) {
>+        goto done;
>+    }
>+
>+done:
>+    NlMsgEndNested(nlBuf, offset);
>+    return status;
>+}
>+
>+static __inline NDIS_STATUS
>+MapCtCounterToNl(PNL_BUFFER nlBuf,
>+                 UINT16 counterType,
>+                 OVS_CT_KEY *key)
>+{
>+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>+    UINT32 offset = 0;
>+
>+    offset = NlMsgStartNested(nlBuf, counterType);
>+    if (!offset) {
>+        return NDIS_STATUS_FAILURE;
>+    }
>+
>+    if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_PACKETS,
>+                         htonll(key->packetCount))) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+
>+    if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_BYTES,
>+                         htonll(key->byteCount))) {
>+        status = NDIS_STATUS_FAILURE;
>+        goto done;
>+    }
>+
>+done:
>+    NlMsgEndNested(nlBuf, offset);
>+    return status;
>+}
>+
>+/* Userspace expects system time to be Unix timestamp in Nano Seconds */
>+static __inline unsigned
>+WindowsTickToUnixSeconds(long long windowsTicks)
>+{
>+    /*
>+        Windows epoch starts 1601-01-01T00:00:00Z. It's 11644473600
>seconds
>+        before the UNIX/Linux epoch (1970-01-01T00:00:00Z). Windows
>ticks are
>+        in 100 nanoseconds
>+    */

minor: comment format is:
/*
 *
 *
 */

>+    return (unsigned)((windowsTicks / WINDOWS_TICK
>+                        - SEC_TO_UNIX_EPOCH));
>+}
>+
>+static NTSTATUS
>+OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry,
>+                          POVS_MESSAGE msgIn,
>+                          PVOID outBuffer,
>+                          UINT32 outBufLen,
>+                          int dpIfIndex)
>+{
>+    NL_BUFFER nlBuf;
>+    BOOLEAN ok;
>+    PNL_MSG_HDR nlMsg;
>+    UINT32 timeout;
>+    NDIS_STATUS status;
>+    UINT64 currentTime, expiration;
>+    NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
>+    UINT8 nfgenFamily = 0;
>+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
>+        nfgenFamily = AF_INET;
>+    } else if (entry->key.dl_type == htons(ETH_TYPE_IPV6)) {
>+        nfgenFamily = AF_INET6;
>+    }
>+
>+    NlBufInit(&nlBuf, outBuffer, outBufLen);
>+    /* Mimic netfilter */
>+    UINT16 nlmsgType = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
>+    ok = NlFillOvsMsgForNfGenMsg(&nlBuf, nlmsgType, NLM_F_CREATE,
>+                                 msgIn->nlMsg.nlmsgSeq,
>+                                 msgIn->nlMsg.nlmsgPid,
>+                                 nfgenFamily,
>+                                 msgIn->nfGenMsg.version,
>+                                 dpIfIndex);
>+    if (!ok) {
>+        return STATUS_INVALID_BUFFER_SIZE;
>+    }
>+
>+    status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_ORIG, &entry->key);
>+    if (status != NDIS_STATUS_SUCCESS) {
>+        return STATUS_UNSUCCESSFUL;
>+    }
>+
>+    status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_REPLY, &entry->rev_key);
>+    if (status != NDIS_STATUS_SUCCESS) {
>+        return STATUS_UNSUCCESSFUL;
>+    }
>+
>+    status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_ORIG, &entry->key);
>+    if (status != NDIS_STATUS_SUCCESS) {
>+        return STATUS_UNSUCCESSFUL;
>+    }
>+
>+    status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_REPLY,
>&entry->rev_key);
>+    if (status != NDIS_STATUS_SUCCESS) {
>+        return STATUS_UNSUCCESSFUL;
>+    }
>+
>+    if (entry->key.zone) {
>+        if (!NlMsgPutTailU16(&nlBuf, CTA_ZONE, htons(entry->key.zone))) {
>+            return STATUS_INVALID_BUFFER_SIZE;
>+        }
>+    }
>+
>+    if (entry->mark) {
>+        if (!NlMsgPutTailU32(&nlBuf, CTA_MARK, htonl(entry->mark))) {
>+            return STATUS_INVALID_BUFFER_SIZE;
>+        }
>+    }
>+
>+    if (entry->labels.ct_labels) {
>+        ok = NlMsgPutTailUnspec(&nlBuf, CTA_LABELS,
>+                                (PCHAR)(&entry->labels),
>+                                sizeof(entry->labels));
>+        if (!ok) {
>+            return STATUS_INVALID_BUFFER_SIZE;
>+        }
>+    }
>+
>+    expiration = entry->expiration - currentTime;
>+    timeout = (UINT32) ((expiration > 0) ? expiration / CT_INTERVAL_SEC:
>0);

Œexpiration¹ is a UINT64 and will always be > 0. You probably need to do:
(UINT32)expiration > 0

>+    if (timeout > 0) {
>+        if (!NlMsgPutTailU32(&nlBuf, CTA_TIMEOUT, htonl(timeout))) {
>+            return STATUS_INVALID_BUFFER_SIZE;
>+        }
>+    }
>+
>+    if (entry->key.nw_proto == IPPROTO_TCP) {
>+        /* Add ProtoInfo for TCP */
>+        UINT32 offset;
>+        offset = NlMsgStartNested(&nlBuf, CTA_PROTOINFO);
>+        if (!offset) {
>+            return NDIS_STATUS_FAILURE;
>+        }
>+
>+        status = OvsCtMapTcpProtoInfoToNl(&nlBuf, entry);
>+        if (status != NDIS_STATUS_SUCCESS) {

It may not matter in practice, but to be consistent with the other code in
the function, you probably want to close the nested attribute before
returning failure.

>+            return STATUS_UNSUCCESSFUL;
>+        }
>+
>+        NlMsgEndNested(&nlBuf, offset);
>+    }
>+
>+    /* CTA_STATUS is required but not implemented. Default to 0 */
>+    if (!NlMsgPutTailU32(&nlBuf, CTA_STATUS, 0)) {
>+        return STATUS_INVALID_BUFFER_SIZE;
>+    }
>+
>+    /* Mimic netfilter - nf_conntrack_netlink.c:
>+        static inline int ctnetlink_dump_id(struct sk_buff *skb, const
>struct nf_conn *ct) {
>+            NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
>+            return 0;
>+        }
>+    */

minor: comment style.

>+    if(!NlMsgPutTailU32(&nlBuf, CTA_ID, htonl((UINT32) entry))) {
>+        return STATUS_INVALID_BUFFER_SIZE;
>+    }
>+
>+    if (entry->timestampStart) {
>+        UINT32 offset;
>+        offset = NlMsgStartNested(&nlBuf, CTA_TIMESTAMP);
>+        if (!offset) {
>+            return NDIS_STATUS_FAILURE;
>+        }
>+        UINT64 start;
>+        start = WindowsTickToUnixSeconds(entry->timestampStart);
>+        start = start * SEC_TO_NANOSEC;
>+        if (!NlMsgPutTailU64(&nlBuf, CTA_TIMESTAMP_START,
>htonll(start))) {

Close nested.

>+            return STATUS_INVALID_BUFFER_SIZE;
>+        }
>+
>+        NlMsgEndNested(&nlBuf, offset);
>+    }
>+
>+    nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
>+    nlMsg->nlmsgLen = NlBufSize(&nlBuf);
>+
>+    return STATUS_SUCCESS;
>+}
>+
>+/*
>+ 
>*-------------------------------------------------------------------------
>---
>+ *  OvsCtDumpCmdHandler --
>+ *    Handler for IPCTNL_MSG_CT_GET command.
>+ *
>+ *  XXX - Try to consolidate dump handler patterns around dumpState usage
>+ *        The following dumpHandler is similar to one vport.c uses
>+ 
>*-------------------------------------------------------------------------
>---
>+*/
>+NTSTATUS
>+OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
>+                    UINT32 *replyLen)
>+{
>+    NTSTATUS rc;
>+    /* Setup Dump Start if it's OVS_WRITE_DEV_OP and return */
>+    if (usrParamsCtx->devOp == OVS_WRITE_DEV_OP) {
>+        *replyLen = 0;
>+        OvsSetupDumpStart(usrParamsCtx);
>+        return STATUS_SUCCESS;
>+    }
>+
>+    POVS_OPEN_INSTANCE instance =
>+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
>+    POVS_MESSAGE msgIn;
>+    UINT32 i;
>+
>+    ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
>+    if (instance->dumpState.ovsMsg == NULL) {
>+        ASSERT(FALSE);
>+        return STATUS_INVALID_DEVICE_STATE;
>+    }
>+
>+    /* Output buffer has been validated while validating read dev op. */
>+    ASSERT(usrParamsCtx->outputBuffer != NULL);
>+    msgIn = instance->dumpState.ovsMsg;
>+    UINT32 inBucket = instance->dumpState.index[0];
>+    UINT32 inIndex = instance->dumpState.index[1];
>+    UINT32 outIndex = 0;
>+
>+    LOCK_STATE_EX lockState;
>+    NdisAcquireRWLockRead(ovsConntrackLockObj, &lockState, 0);
>+

Even when there are no entries, we¹ll end up looping through the hash
table. Can this be optimized by keeping a count of the number of entries
and skipping the for loop on no entries.

>+    for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {
>+        PLIST_ENTRY head, link;
>+        head = &ovsConntrackTable[i];
>+        POVS_CT_ENTRY entry = NULL;
>+
>+        outIndex = 0;
>+        LIST_FORALL(head, link) {
>+            /*
>+                * if one or more dumps were previously done on this same
>bucket,
>+                * inIndex will be > 0, so we'll need to reply with the
>+                * inIndex + 1 ct-entry from the bucket.
>+            */

minor: alignment is off.

>+            if (outIndex >= inIndex) {
>+                entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
>+
>+                rc = OvsCreateNlMsgFromCtEntry(entry, msgIn,
>+                 
>usrParamsCtx->outputBuffer,
>+                 
>usrParamsCtx->outputLength,
>+                                               gOvsSwitchContext->dpNo);
>+
>+                if (rc != NDIS_STATUS_SUCCESS) {
>+                    NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
>+                    return STATUS_UNSUCCESSFUL;
>+                }
>+
>+                ++outIndex;
>+                break;
>+            }
>+
>+            ++outIndex;
>+        }
>+
>+        if (entry) {
>+            break;
>+        }
>+
>+        /*
>+            * if no ct-entry was found above, check the next bucket,
>beginning
>+            * with the first (i.e. index 0) elem from within that bucket

minor: alignment

>+        */
>+        inIndex = 0;
>+    }
>+
>+    instance->dumpState.index[0] = i;
>+    instance->dumpState.index[1] = outIndex;
>+    NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
>+
>+    /* if i < CT_HASH_TABLE_SIZE => entry was found */
>+    if (i < CT_HASH_TABLE_SIZE) {
>+        POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
>+        *replyLen = msgOut->nlMsg.nlmsgLen;
>+    } else {
>+        /* if i >= CT_HASH_TABLE_SIZE => entry was not found => dump
>done */
>+        *replyLen = 0;
>+        FreeUserDumpState(instance);
>+    }
>+
>+    return STATUS_SUCCESS;
>+}
>diff --git a/datapath-windows/ovsext/Conntrack.h
>b/datapath-windows/ovsext/Conntrack.h
>index 883ac57..6d573c8 100644
>--- a/datapath-windows/ovsext/Conntrack.h
>+++ b/datapath-windows/ovsext/Conntrack.h
>@@ -59,6 +59,8 @@ typedef struct _OVS_CT_KEY {
>     UINT16 dl_type;
>     UINT8 nw_proto;
>     UINT16 zone;
>+    UINT64 packetCount;
>+    UINT64 byteCount;
> } OVS_CT_KEY, *POVS_CT_KEY;
> 
> typedef struct OVS_CT_ENTRY {
>@@ -67,6 +69,7 @@ typedef struct OVS_CT_ENTRY {
>     UINT64      expiration;
>     LIST_ENTRY  link;
>     UINT32      mark;
>+    UINT64      timestampStart;
>     struct ovs_key_ct_labels labels;
> } OVS_CT_ENTRY, *POVS_CT_ENTRY;
> 
>@@ -102,6 +105,8 @@ BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr
>*tcp);
> OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
>                                           PNET_BUFFER_LIST nbl,
>                                           UINT64 now);
>+NDIS_STATUS OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf,
>+                                     OVS_CT_ENTRY *conn_);
> OVS_CT_ENTRY * OvsConntrackCreateOtherEntry(UINT64 now);
> enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>                                               const TCPHdr *tcp,
>diff --git a/datapath-windows/ovsext/Util.h
>b/datapath-windows/ovsext/Util.h
>index 4bcde3b..aa54322 100644
>--- a/datapath-windows/ovsext/Util.h
>+++ b/datapath-windows/ovsext/Util.h
>@@ -79,6 +79,10 @@ VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
> #define ntohs(_x)    _byteswap_ushort((USHORT)(_x))
> #define htonl(_x)    _byteswap_ulong((ULONG)(_x))
> #define ntohl(_x)    _byteswap_ulong((ULONG)(_x))
>+#define htonll(_x)    ((1==htonl(1)) ? (_x) : \
>+                           ((uint64_t) htonl(_x) << 32) | htonl(_x >>
>32))
>+#define ntohll(_x)    ((1==ntohl(1)) ? (_x) : \
>+                           ((uint64_t) ntohl(_x) << 32) | ntohl(_x >>
>32))
> #endif
> 
> #define OVS_INIT_OBJECT_HEADER(_obj, _type, _revision, _size) \
>-- 
>2.5.0.windows.1
>
>_______________________________________________
>dev mailing list
>dev@openvswitch.org
>https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma
>n_listinfo_dev&d=CwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pN
>HQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=DSUPXpsB8K13kULn_jcJAdvdDmPgix
>ZrlugijxnoELk&s=Lof3TSg87QTrohBhHqOJiwbV6Z7Y2EQ3qVONYVFjkaE&e=
diff mbox

Patch

diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
index 19925c3..1d60323 100644
--- a/datapath-windows/ovsext/Conntrack-tcp.c
+++ b/datapath-windows/ovsext/Conntrack-tcp.c
@@ -523,3 +523,62 @@  OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
 
     return &newconn->up;
 }
+
+static __inline uint8_t
+OvsCtTcpPeerToProtoInfoFlags(const struct tcp_peer *peer)
+{
+    uint8_t res = 0;
+
+    if (peer->wscale & CT_WSCALE_FLAG) {
+        res |= CT_DPIF_TCPF_WINDOW_SCALE;
+    }
+
+    if (peer->wscale & CT_WSCALE_UNKNOWN) {
+        res |= CT_DPIF_TCPF_BE_LIBERAL;
+    }
+
+    return res;
+}
+
+NDIS_STATUS
+OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf, OVS_CT_ENTRY *conn_)
+{
+    struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    UINT32 offset = 0;
+
+    offset = NlMsgStartNested(nlBuf, CTA_PROTOINFO_TCP);
+    if (!offset) {
+        return NDIS_STATUS_FAILURE;
+    }
+
+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTOINFO_TCP_STATE,
+                        conn->peer[0].state)) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+                        (conn->peer[0].wscale & CT_WSCALE_MASK))) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTOINFO_TCP_WSCALE_REPLY,
+                        (conn->peer[1].wscale & CT_WSCALE_MASK))) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+    if (!NlMsgPutTailU16(nlBuf, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+                         OvsCtTcpPeerToProtoInfoFlags(&conn->peer[0]))) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+    if (!NlMsgPutTailU16(nlBuf, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+                         OvsCtTcpPeerToProtoInfoFlags(&conn->peer[1]))) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+
+done:
+    NlMsgEndNested(nlBuf, offset);
+    return status;
+}
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 15c495d..ebfdeef 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -24,6 +24,10 @@ 
 #include "PacketParser.h"
 #include "Debug.h"
 
+#define WINDOWS_TICK 10000000
+#define SEC_TO_UNIX_EPOCH 11644473600LL
+#define SEC_TO_NANOSEC 1000000000LL
+
 typedef struct _OVS_CT_THREAD_CTX {
     KEVENT      event;
     PVOID       threadObject;
@@ -34,6 +38,7 @@  KSTART_ROUTINE ovsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 
 /*
  *----------------------------------------------------------------------------
@@ -147,11 +152,12 @@  OvsCtUpdateFlowKey(struct OvsFlowKey *key,
 }
 
 static __inline VOID
-OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx)
+OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now)
 {
     NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
     NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
     OvsCtKeyReverse(&entry->rev_key);
+    entry->timestampStart = now;
     InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
                    &entry->link);
 }
@@ -181,7 +187,10 @@  OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
             state |= OVS_CS_F_NEW;
             if (commit) {
                 entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
-                OvsCtAddEntry(entry, ctx);
+                if (!entry) {
+                    return NULL;
+                }
+                OvsCtAddEntry(entry, ctx, currentTime);
             }
 
             OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
@@ -192,7 +201,10 @@  OvsCtEntryCreate(PNET_BUFFER_LIST curNbl,
             state |= OVS_CS_F_NEW;
             if (commit) {
                 entry = OvsConntrackCreateOtherEntry(currentTime);
-                OvsCtAddEntry(entry, ctx);
+                if (!entry) {
+                    return NULL;
+                }
+                OvsCtAddEntry(entry, ctx, currentTime);
             }
 
             OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
@@ -290,6 +302,18 @@  OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
         (ctxKey.zone == entryKey.zone));
 }
 
+static __inline VOID
+OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
+{
+    if (reply) {
+        entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);
+        entry->rev_key.packetCount++;
+    } else {
+        entry->key.byteCount += OvsPacketLenNBL(nbl);
+        entry->key.packetCount++;
+    }
+}
+
 static __inline POVS_CT_ENTRY
 OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
 {
@@ -440,6 +464,9 @@  OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
             ctx->entry = NULL;
             entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset,
                                      ctx, key, commit, currentTime);
+            if (!entry) {
+                return NULL;
+            }
             break;
         }
     }
@@ -517,6 +544,7 @@  OvsCtExecute_(PNET_BUFFER_LIST curNbl,
                                  key, commit, currentTime);
     } else {
         /* Process the entry and update CT flags */
+        OvsCtIncrementCounters(entry, ctx.reply, curNbl);
         entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key,
                                          zone, commit, currentTime);
     }
@@ -699,3 +727,416 @@  done:
 
     return status;
 }
+
+static __inline NDIS_STATUS
+MapIpTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    UINT32 offset = 0;
+
+    offset = NlMsgStartNested(nlBuf, CTA_TUPLE_IP);
+    if (!offset) {
+        return NDIS_STATUS_FAILURE;
+    }
+
+    if (key->dl_type == ntohs(ETH_TYPE_IPV4)) {
+        if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_SRC, key->src.addr.ipv4)) {
+            status = NDIS_STATUS_FAILURE;
+            goto done;
+        }
+        if (!NlMsgPutTailU32(nlBuf, CTA_IP_V4_DST, key->dst.addr.ipv4)) {
+            status = NDIS_STATUS_FAILURE;
+            goto done;
+        }
+    } else if (key->dl_type == ntohs(ETH_TYPE_IPV6)) {
+        if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_SRC,
+                                (PCHAR)(&key->src.addr.ipv6),
+                                sizeof(key->src.addr.ipv6))) {
+            status = NDIS_STATUS_FAILURE;
+            goto done;
+        }
+        if (!NlMsgPutTailUnspec(nlBuf, CTA_IP_V6_DST,
+                                (PCHAR)(&key->dst.addr.ipv6),
+                                sizeof(key->dst.addr.ipv6))) {
+            status = NDIS_STATUS_FAILURE;
+            goto done;
+        }
+    }
+
+done:
+    NlMsgEndNested(nlBuf, offset);
+    return status;
+}
+
+static __inline NDIS_STATUS
+MapProtoTupleToNl(PNL_BUFFER nlBuf, OVS_CT_KEY *key)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    UINT32 offset = 0;
+
+    offset = NlMsgStartNested(nlBuf, CTA_TUPLE_PROTO);
+    if (!offset) {
+        return NDIS_STATUS_FAILURE;
+    }
+
+    if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_NUM, key->nw_proto)) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+
+    if (key->dl_type == ntohs(ETH_TYPE_IPV4)
+        || key->dl_type == ntohs(ETH_TYPE_IPV6)) {
+        /* ICMP and ICMPv6 Type, Code and ID are currently not tracked */
+        if (key->nw_proto == IPPROTO_ICMP) {
+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMP_ID, 0)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_TYPE, 0)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMP_CODE, 0)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+        } else if (key->nw_proto == IPPROTO_ICMP) {
+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_ICMPV6_ID, 0)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_TYPE, 0)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+            if (!NlMsgPutTailU8(nlBuf, CTA_PROTO_ICMPV6_CODE, 0)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+        } else {
+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_SRC_PORT,
+                                 key->src.port)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+            if (!NlMsgPutTailU16(nlBuf, CTA_PROTO_DST_PORT,
+                                 key->dst.port)) {
+                status = NDIS_STATUS_FAILURE;
+                goto done;
+            }
+        }
+    }
+
+done:
+    NlMsgEndNested(nlBuf, offset);
+    return status;
+}
+
+static __inline NDIS_STATUS
+MapCtKeyTupleToNl(PNL_BUFFER nlBuf,
+                  UINT16 tupleType,
+                  OVS_CT_KEY *key)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    UINT32 offset = 0;
+
+    offset = NlMsgStartNested(nlBuf, tupleType);
+    if (!offset) {
+        return NDIS_STATUS_FAILURE;
+    }
+
+    status = MapIpTupleToNl(nlBuf, key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        goto done;
+    }
+
+    status = MapProtoTupleToNl(nlBuf, key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        goto done;
+    }
+
+done:
+    NlMsgEndNested(nlBuf, offset);
+    return status;
+}
+
+static __inline NDIS_STATUS
+MapCtCounterToNl(PNL_BUFFER nlBuf,
+                 UINT16 counterType,
+                 OVS_CT_KEY *key)
+{
+    NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+    UINT32 offset = 0;
+
+    offset = NlMsgStartNested(nlBuf, counterType);
+    if (!offset) {
+        return NDIS_STATUS_FAILURE;
+    }
+
+    if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_PACKETS,
+                         htonll(key->packetCount))) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+
+    if (!NlMsgPutTailU64(nlBuf, CTA_COUNTERS_BYTES,
+                         htonll(key->byteCount))) {
+        status = NDIS_STATUS_FAILURE;
+        goto done;
+    }
+
+done:
+    NlMsgEndNested(nlBuf, offset);
+    return status;
+}
+
+/* Userspace expects system time to be Unix timestamp in Nano Seconds */
+static __inline unsigned
+WindowsTickToUnixSeconds(long long windowsTicks)
+{
+    /*
+        Windows epoch starts 1601-01-01T00:00:00Z. It's 11644473600 seconds
+        before the UNIX/Linux epoch (1970-01-01T00:00:00Z). Windows ticks are
+        in 100 nanoseconds
+    */
+    return (unsigned)((windowsTicks / WINDOWS_TICK
+                        - SEC_TO_UNIX_EPOCH));
+}
+
+static NTSTATUS
+OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry,
+                          POVS_MESSAGE msgIn,
+                          PVOID outBuffer,
+                          UINT32 outBufLen,
+                          int dpIfIndex)
+{
+    NL_BUFFER nlBuf;
+    BOOLEAN ok;
+    PNL_MSG_HDR nlMsg;
+    UINT32 timeout;
+    NDIS_STATUS status;
+    UINT64 currentTime, expiration;
+    NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
+    UINT8 nfgenFamily = 0;
+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
+        nfgenFamily = AF_INET;
+    } else if (entry->key.dl_type == htons(ETH_TYPE_IPV6)) {
+        nfgenFamily = AF_INET6;
+    }
+
+    NlBufInit(&nlBuf, outBuffer, outBufLen);
+    /* Mimic netfilter */
+    UINT16 nlmsgType = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
+    ok = NlFillOvsMsgForNfGenMsg(&nlBuf, nlmsgType, NLM_F_CREATE,
+                                 msgIn->nlMsg.nlmsgSeq,
+                                 msgIn->nlMsg.nlmsgPid,
+                                 nfgenFamily,
+                                 msgIn->nfGenMsg.version,
+                                 dpIfIndex);
+    if (!ok) {
+        return STATUS_INVALID_BUFFER_SIZE;
+    }
+
+    status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_ORIG, &entry->key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        return STATUS_UNSUCCESSFUL;
+    }
+
+    status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_REPLY, &entry->rev_key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        return STATUS_UNSUCCESSFUL;
+    }
+
+    status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_ORIG, &entry->key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        return STATUS_UNSUCCESSFUL;
+    }
+
+    status = MapCtCounterToNl(&nlBuf, CTA_COUNTERS_REPLY, &entry->rev_key);
+    if (status != NDIS_STATUS_SUCCESS) {
+        return STATUS_UNSUCCESSFUL;
+    }
+
+    if (entry->key.zone) {
+        if (!NlMsgPutTailU16(&nlBuf, CTA_ZONE, htons(entry->key.zone))) {
+            return STATUS_INVALID_BUFFER_SIZE;
+        }
+    }
+
+    if (entry->mark) {
+        if (!NlMsgPutTailU32(&nlBuf, CTA_MARK, htonl(entry->mark))) {
+            return STATUS_INVALID_BUFFER_SIZE;
+        }
+    }
+
+    if (entry->labels.ct_labels) {
+        ok = NlMsgPutTailUnspec(&nlBuf, CTA_LABELS,
+                                (PCHAR)(&entry->labels),
+                                sizeof(entry->labels));
+        if (!ok) {
+            return STATUS_INVALID_BUFFER_SIZE;
+        }
+    }
+
+    expiration = entry->expiration - currentTime;
+    timeout = (UINT32) ((expiration > 0) ? expiration / CT_INTERVAL_SEC: 0);
+    if (timeout > 0) {
+        if (!NlMsgPutTailU32(&nlBuf, CTA_TIMEOUT, htonl(timeout))) {
+            return STATUS_INVALID_BUFFER_SIZE;
+        }
+    }
+
+    if (entry->key.nw_proto == IPPROTO_TCP) {
+        /* Add ProtoInfo for TCP */
+        UINT32 offset;
+        offset = NlMsgStartNested(&nlBuf, CTA_PROTOINFO);
+        if (!offset) {
+            return NDIS_STATUS_FAILURE;
+        }
+
+        status = OvsCtMapTcpProtoInfoToNl(&nlBuf, entry);
+        if (status != NDIS_STATUS_SUCCESS) {
+            return STATUS_UNSUCCESSFUL;
+        }
+
+        NlMsgEndNested(&nlBuf, offset);
+    }
+
+    /* CTA_STATUS is required but not implemented. Default to 0 */
+    if (!NlMsgPutTailU32(&nlBuf, CTA_STATUS, 0)) {
+        return STATUS_INVALID_BUFFER_SIZE;
+    }
+
+    /* Mimic netfilter - nf_conntrack_netlink.c:
+        static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) {
+            NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
+            return 0;
+        }
+    */
+    if(!NlMsgPutTailU32(&nlBuf, CTA_ID, htonl((UINT32) entry))) {
+        return STATUS_INVALID_BUFFER_SIZE;
+    }
+
+    if (entry->timestampStart) {
+        UINT32 offset;
+        offset = NlMsgStartNested(&nlBuf, CTA_TIMESTAMP);
+        if (!offset) {
+            return NDIS_STATUS_FAILURE;
+        }
+        UINT64 start;
+        start = WindowsTickToUnixSeconds(entry->timestampStart);
+        start = start * SEC_TO_NANOSEC;
+        if (!NlMsgPutTailU64(&nlBuf, CTA_TIMESTAMP_START, htonll(start))) {
+            return STATUS_INVALID_BUFFER_SIZE;
+        }
+
+        NlMsgEndNested(&nlBuf, offset);
+    }
+
+    nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
+    nlMsg->nlmsgLen = NlBufSize(&nlBuf);
+
+    return STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *  OvsCtDumpCmdHandler --
+ *    Handler for IPCTNL_MSG_CT_GET command.
+ *
+ *  XXX - Try to consolidate dump handler patterns around dumpState usage
+ *        The following dumpHandler is similar to one vport.c uses
+ *----------------------------------------------------------------------------
+*/
+NTSTATUS
+OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                    UINT32 *replyLen)
+{
+    NTSTATUS rc;
+    /* Setup Dump Start if it's OVS_WRITE_DEV_OP and return */
+    if (usrParamsCtx->devOp == OVS_WRITE_DEV_OP) {
+        *replyLen = 0;
+        OvsSetupDumpStart(usrParamsCtx);
+        return STATUS_SUCCESS;
+    }
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    POVS_MESSAGE msgIn;
+    UINT32 i;
+
+    ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
+    if (instance->dumpState.ovsMsg == NULL) {
+        ASSERT(FALSE);
+        return STATUS_INVALID_DEVICE_STATE;
+    }
+
+    /* Output buffer has been validated while validating read dev op. */
+    ASSERT(usrParamsCtx->outputBuffer != NULL);
+    msgIn = instance->dumpState.ovsMsg;
+    UINT32 inBucket = instance->dumpState.index[0];
+    UINT32 inIndex = instance->dumpState.index[1];
+    UINT32 outIndex = 0;
+
+    LOCK_STATE_EX lockState;
+    NdisAcquireRWLockRead(ovsConntrackLockObj, &lockState, 0);
+
+    for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {
+        PLIST_ENTRY head, link;
+        head = &ovsConntrackTable[i];
+        POVS_CT_ENTRY entry = NULL;
+
+        outIndex = 0;
+        LIST_FORALL(head, link) {
+            /*
+                * if one or more dumps were previously done on this same bucket,
+                * inIndex will be > 0, so we'll need to reply with the
+                * inIndex + 1 ct-entry from the bucket.
+            */
+            if (outIndex >= inIndex) {
+                entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
+
+                rc = OvsCreateNlMsgFromCtEntry(entry, msgIn,
+                                               usrParamsCtx->outputBuffer,
+                                               usrParamsCtx->outputLength,
+                                               gOvsSwitchContext->dpNo);
+
+                if (rc != NDIS_STATUS_SUCCESS) {
+                    NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+                    return STATUS_UNSUCCESSFUL;
+                }
+
+                ++outIndex;
+                break;
+            }
+
+            ++outIndex;
+        }
+
+        if (entry) {
+            break;
+        }
+
+        /*
+            * if no ct-entry was found above, check the next bucket, beginning
+            * with the first (i.e. index 0) elem from within that bucket
+        */
+        inIndex = 0;
+    }
+
+    instance->dumpState.index[0] = i;
+    instance->dumpState.index[1] = outIndex;
+    NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
+
+    /* if i < CT_HASH_TABLE_SIZE => entry was found */
+    if (i < CT_HASH_TABLE_SIZE) {
+        POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+        *replyLen = msgOut->nlMsg.nlmsgLen;
+    } else {
+        /* if i >= CT_HASH_TABLE_SIZE => entry was not found => dump done */
+        *replyLen = 0;
+        FreeUserDumpState(instance);
+    }
+
+    return STATUS_SUCCESS;
+}
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index 883ac57..6d573c8 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -59,6 +59,8 @@  typedef struct _OVS_CT_KEY {
     UINT16 dl_type;
     UINT8 nw_proto;
     UINT16 zone;
+    UINT64 packetCount;
+    UINT64 byteCount;
 } OVS_CT_KEY, *POVS_CT_KEY;
 
 typedef struct OVS_CT_ENTRY {
@@ -67,6 +69,7 @@  typedef struct OVS_CT_ENTRY {
     UINT64      expiration;
     LIST_ENTRY  link;
     UINT32      mark;
+    UINT64      timestampStart;
     struct ovs_key_ct_labels labels;
 } OVS_CT_ENTRY, *POVS_CT_ENTRY;
 
@@ -102,6 +105,8 @@  BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
 OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
                                           PNET_BUFFER_LIST nbl,
                                           UINT64 now);
+NDIS_STATUS OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf,
+                                     OVS_CT_ENTRY *conn_);
 OVS_CT_ENTRY * OvsConntrackCreateOtherEntry(UINT64 now);
 enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
                                               const TCPHdr *tcp,
diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h
index 4bcde3b..aa54322 100644
--- a/datapath-windows/ovsext/Util.h
+++ b/datapath-windows/ovsext/Util.h
@@ -79,6 +79,10 @@  VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
 #define ntohs(_x)    _byteswap_ushort((USHORT)(_x))
 #define htonl(_x)    _byteswap_ulong((ULONG)(_x))
 #define ntohl(_x)    _byteswap_ulong((ULONG)(_x))
+#define htonll(_x)    ((1==htonl(1)) ? (_x) : \
+                           ((uint64_t) htonl(_x) << 32) | htonl(_x >> 32))
+#define ntohll(_x)    ((1==ntohl(1)) ? (_x) : \
+                           ((uint64_t) ntohl(_x) << 32) | ntohl(_x >> 32))
 #endif
 
 #define OVS_INIT_OBJECT_HEADER(_obj, _type, _revision, _size) \