diff mbox series

[ovs-dev,v4,2/4] datapath-windows: Remove NAT locks in conntrack.

Message ID 20180618053729.2372-3-kumaranand@vmware.com
State Superseded
Headers show
Series Optimize conntrack performance | expand

Commit Message

Anand Kumar June 18, 2018, 5:37 a.m. UTC
This patch primarily gets rid of NdisRWLock in conntrack for NAT
functionality along with some conntrack optimization. The subsequent
patch will have a lock implementation inside NAT module.

- Introduce a new function OvsGetTcpHeader() to retrieve TCP header
  and payload length, to optimize for TCP traffic.
- Optimize conntrack look up.
- Remove 'bucketlockRef' member from conntrack entry structure.

Signed-off-by: Anand Kumar <kumaranand@vmware.com>
---
 datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
 datapath-windows/ovsext/Conntrack-tcp.c |  15 ++---
 datapath-windows/ovsext/Conntrack.c     | 110 +++++++++++++-------------------
 datapath-windows/ovsext/Conntrack.h     |  36 +++++++----
 4 files changed, 74 insertions(+), 91 deletions(-)

Comments

Shashank Ram June 18, 2018, 6:54 p.m. UTC | #1
On 06/17/2018 10:37 PM, Anand Kumar wrote:
> This patch primarily gets rid of NdisRWLock in conntrack for NAT
> functionality along with some conntrack optimization. The subsequent
> patch will have a lock implementation inside NAT module.
>
> - Introduce a new function OvsGetTcpHeader() to retrieve TCP header
>    and payload length, to optimize for TCP traffic.
> - Optimize conntrack look up.
> - Remove 'bucketlockRef' member from conntrack entry structure.
>
> Signed-off-by: Anand Kumar <kumaranand@vmware.com>
> ---
>   datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
>   datapath-windows/ovsext/Conntrack-tcp.c |  15 ++---
>   datapath-windows/ovsext/Conntrack.c     | 110 +++++++++++++-------------------
>   datapath-windows/ovsext/Conntrack.h     |  36 +++++++----
>   4 files changed, 74 insertions(+), 91 deletions(-)
>
> diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c
> index 6830dfa..ce09a65 100644
> --- a/datapath-windows/ovsext/Conntrack-ftp.c
> +++ b/datapath-windows/ovsext/Conntrack-ftp.c
> @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
>       char temp[256] = { 0 };
>       char ftpMsg[256] = { 0 };
>   
> +    UINT32 len;
>       TCPHdr tcpStorage;
>       const TCPHdr *tcp;
> -    tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
> +    tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &len);
>       if (!tcp) {
>           return NDIS_STATUS_INVALID_PACKET;
>       }
>   
> -    UINT32 len = OvsGetTcpPayloadLength(curNbl);
>       if (len > sizeof(temp)) {
>           /* We only care up to 256 */
>           len = sizeof(temp);
> diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
> index 8cbab24..eda42ac 100644
> --- a/datapath-windows/ovsext/Conntrack-tcp.c
> +++ b/datapath-windows/ovsext/Conntrack-tcp.c
> @@ -194,9 +194,9 @@ OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
>   enum CT_UPDATE_RES
>   OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>                              const TCPHdr *tcp,
> -                           PNET_BUFFER_LIST nbl,
>                              BOOLEAN reply,
> -                           UINT64 now)
> +                           UINT64 now,
> +                           UINT32 tcpPayloadLen)
>   {
>       struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
>       /* The peer that sent 'pkt' */
> @@ -207,7 +207,6 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>       UINT16 tcp_flags = ntohs(tcp->flags);
>       uint16_t win = ntohs(tcp->window);
>       uint32_t ack, end, seq, orig_seq;
> -    uint32_t p_len = OvsGetTcpPayloadLength(nbl);
>       int ackskew;
>   
>       if (OvsCtInvalidTcpFlags(tcp_flags)) {
> @@ -248,7 +247,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>   
>           ack = ntohl(tcp->ack_seq);
>   
> -        end = seq + p_len;
> +        end = seq + tcpPayloadLen;
>           if (tcp_flags & TCP_SYN) {
>               end++;
>               if (dst->wscale & CT_WSCALE_FLAG) {
> @@ -287,7 +286,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>   
>       } else {
>           ack = ntohl(tcp->ack_seq);
> -        end = seq + p_len;
> +        end = seq + tcpPayloadLen;
>           if (tcp_flags & TCP_SYN) {
>               end++;
>           }
> @@ -469,8 +468,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
>   
>   OVS_CT_ENTRY *
>   OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
> -                           PNET_BUFFER_LIST nbl,
> -                           UINT64 now)
> +                           UINT64 now,
> +                           UINT32 tcpPayloadLen)
>   {
>       struct conn_tcp* newconn;
>       struct tcp_peer *src, *dst;
> @@ -486,7 +485,7 @@ OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
>       dst = &newconn->peer[1];
>   
>       src->seqlo = ntohl(tcp->seq);
> -    src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
> +    src->seqhi = src->seqlo + tcpPayloadLen + 1;
>   
>       if (tcp->flags & TCP_SYN) {
>           src->seqhi++;
> diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
> index 7b54fba..2a85e57 100644
> --- a/datapath-windows/ovsext/Conntrack.c
> +++ b/datapath-windows/ovsext/Conntrack.c
> @@ -32,7 +32,6 @@ KSTART_ROUTINE OvsConntrackEntryCleaner;
>   static PLIST_ENTRY ovsConntrackTable;
>   static OVS_CT_THREAD_CTX ctThreadCtx;
>   static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
> -static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
>   extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
>   static ULONG ctTotalEntries;
>   
> @@ -54,19 +53,11 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
>       ctTotalEntries = 0;
>       UINT32 numBucketLocks = CT_HASH_TABLE_SIZE;
>   
> -    /* Init the sync-lock */
> -    ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
> -    if (ovsCtNatLockObj == NULL) {
> -        return STATUS_INSUFFICIENT_RESOURCES;
> -    }
> -
>       /* Init the Hash Buffer */
>       ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
>                                                    * CT_HASH_TABLE_SIZE,
>                                                    OVS_CT_POOL_TAG);
>       if (ovsConntrackTable == NULL) {
> -        NdisFreeRWLock(ovsCtNatLockObj);
> -        ovsCtNatLockObj = NULL;
>           return STATUS_INSUFFICIENT_RESOURCES;
>       }
>   
> @@ -121,8 +112,6 @@ freeBucketLock:
>   freeTable:
>       OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
>       ovsConntrackTable = NULL;
> -    NdisFreeRWLock(ovsCtNatLockObj);
> -    ovsCtNatLockObj = NULL;
>       return status;
>   }
>   
> @@ -135,7 +124,6 @@ freeTable:
>   VOID
>   OvsCleanupConntrack(VOID)
>   {
> -    LOCK_STATE_EX lockStateNat;
>       ctThreadCtx.exit = 1;
>       KeSetEvent(&ctThreadCtx.event, 0, FALSE);
>       KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
> @@ -160,12 +148,7 @@ OvsCleanupConntrack(VOID)
>       }
>       OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
>       ovsCtBucketLock = NULL;
> -
> -    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>       OvsNatCleanup();
> -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
> -    NdisFreeRWLock(ovsCtNatLockObj);
> -    ovsCtNatLockObj = NULL;
>   }
>   
>   static __inline VOID
> @@ -244,25 +227,20 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
>       if (natInfo == NULL) {
>           entry->natInfo.natAction = NAT_ACTION_NONE;
>       } else {
> -        LOCK_STATE_EX lockStateNat;
> -        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>           if (OvsIsForwardNat(natInfo->natAction)) {
>               entry->natInfo = *natInfo;
>               if (!OvsNatTranslateCtEntry(entry)) {
> -                NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>                   return FALSE;
>               }
>               ctx->hash = OvsHashCtKey(&entry->key);
>           } else {
>               entry->natInfo.natAction = natInfo->natAction;
>           }
> -        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       }
>   
>       entry->timestampStart = now;
>       NdisAllocateSpinLock(&(entry->lock));
>       UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
> -    entry->bucketLockRef = ovsCtBucketLock[bucketIdx];
>       NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);
>       InsertHeadList(&ovsConntrackTable[bucketIdx],
>                      &entry->link);
> @@ -275,7 +253,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
>   static __inline POVS_CT_ENTRY
>   OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>                    UINT8 ipProto,
> -                 UINT32 l4Offset,
> +                 OVS_PACKET_HDR_INFO *layers,
>                    OvsConntrackKeyLookupCtx *ctx,
>                    OvsFlowKey *key,
>                    PNAT_ACTION_INFO natInfo,
> @@ -293,16 +271,18 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>       switch (ipProto) {
>       case IPPROTO_TCP:
>       {
> +        UINT32 tcpPayloadLen;
>           TCPHdr tcpStorage;
>           const TCPHdr *tcp;
> -        tcp = OvsGetTcp(curNbl, l4Offset, &tcpStorage);
> +        tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &tcpPayloadLen);
>           if (!OvsConntrackValidateTcpPacket(tcp)) {
>               state = OVS_CS_F_INVALID;
>               break;
>           }
>   
>           if (commit) {
> -            entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
> +            entry = OvsConntrackCreateTcpEntry(tcp, currentTime,
> +                                               tcpPayloadLen);
>           }
>           break;
>       }
> @@ -310,7 +290,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>       {
>           ICMPHdr storage;
>           const ICMPHdr *icmp;
> -        icmp = OvsGetIcmp(curNbl, l4Offset, &storage);
> +        icmp = OvsGetIcmp(curNbl, layers->l4Offset, &storage);
>           if (!OvsConntrackValidateIcmpPacket(icmp)) {
>               if(icmp) {
>                   OVS_LOG_TRACE("Invalid ICMP packet detected, icmp->type %u",
> @@ -371,7 +351,7 @@ static enum CT_UPDATE_RES
>   OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>                    PNET_BUFFER_LIST nbl,
>                    UINT8 ipProto,
> -                 UINT32 l4Offset,
> +                 OVS_PACKET_HDR_INFO *layers,
>                    BOOLEAN reply,
>                    UINT64 now)
>   {
> @@ -380,15 +360,17 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>       switch (ipProto) {
>       case IPPROTO_TCP:
>       {
> +        UINT32 tcpPayloadLen;
>           TCPHdr tcpStorage;
>           const TCPHdr *tcp;
> -        tcp = OvsGetTcp(nbl, l4Offset, &tcpStorage);
> +        tcp = OvsGetTcpHeader(nbl, layers, &tcpStorage, &tcpPayloadLen);
>           if (!tcp) {
>               status = CT_UPDATE_INVALID;
>               break;
>           }
>           OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
> -        status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
> +        status = OvsConntrackUpdateTcpEntry(entry, tcp, reply, now,
> +                                            tcpPayloadLen);
>           OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>           break;
>       }
> @@ -433,10 +415,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
>       OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>       if (forceDelete || OvsCtEntryExpired(entry)) {
>           if (entry->natInfo.natAction) {
> -            LOCK_STATE_EX lockStateNat;
> -            NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>               OvsNatDeleteKey(&entry->key);
> -            NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>           }
>           OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
>           RemoveEntryList(&entry->link);
> @@ -479,15 +458,12 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
>   }
>   
>   BOOLEAN
> -OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
> +OvsCtEndpointsAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
>   {
>       return ((NdisEqualMemory(&ctxKey.src, &entryKey.src,
>                                sizeof(struct ct_endpoint))) &&
>               (NdisEqualMemory(&ctxKey.dst, &entryKey.dst,
> -                             sizeof(struct ct_endpoint))) &&
> -            (ctxKey.dl_type == entryKey.dl_type) &&
> -            (ctxKey.nw_proto == entryKey.nw_proto) &&
> -            (ctxKey.zone == entryKey.zone));
> +                             sizeof(struct ct_endpoint))));
>   }
>   
>   POVS_CT_ENTRY
> @@ -499,6 +475,11 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>       POVS_CT_ENTRY found = NULL;
>       LOCK_STATE_EX lockStateTable;
>       UINT32 bucketIdx;
> +
> +    if (!ctTotalEntries) {
> +        return found;
> +    }
> +

The counter above needs protection to guard against change?
>       /* Reverse NAT must be performed before OvsCtLookup, so here
>        * we simply need to flip the src and dst in key and compare
>        * they are equal. Note that flipped key is not equal to
> @@ -507,10 +488,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>       OVS_CT_KEY revCtxKey = ctx->key;
>       OvsCtKeyReverse(&revCtxKey);
>   
> -    if (!ctTotalEntries) {
> -        return found;
> -    }
> -
>       KIRQL irql = KeGetCurrentIrql();
>       bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
>       NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
> @@ -518,12 +495,19 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>           entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
>           OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>   
> -        if (OvsCtKeyAreSame(ctx->key, entry->key)) {
> +        if ((ctx->key.dl_type != entry->key.dl_type) ||
> +            (ctx->key.nw_proto != entry->key.nw_proto) ||
> +            (ctx->key.zone != entry->key.zone)) {
> +            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
> +            continue;
> +        }
> +
> +        if (OvsCtEndpointsAreSame(ctx->key, entry->key)) {
>               found = entry;
>               reply = FALSE;
>           }
>   
> -        if (!found && OvsCtKeyAreSame(revCtxKey, entry->key)) {
> +        if (!found && OvsCtEndpointsAreSame(revCtxKey, entry->key)) {
>               found = entry;
>               reply = TRUE;
>           }
> @@ -649,10 +633,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
>           return NDIS_STATUS_INVALID_PACKET;
>       }
>   
> -    LOCK_STATE_EX lockStateNat;
> -    NdisAcquireRWLockRead(ovsCtNatLockObj, &lockStateNat, 0);
>       natEntry = OvsNatLookup(&ctx->key, TRUE);
> -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       if (natEntry) {
>           /* Translate address first for reverse NAT */
>           ctx->key = natEntry->ctEntry->key;
> @@ -678,7 +659,7 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
>    */
>   static __inline POVS_CT_ENTRY
>   OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
> -                         UINT32 l4Offset,
> +                         OVS_PACKET_HDR_INFO *layers,
>                            OvsConntrackKeyLookupCtx *ctx,
>                            OvsFlowKey *key,
>                            UINT16 zone,
> @@ -691,7 +672,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>       UINT32 state = 0;
>       PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>       LOCK_STATE_EX lockStateTable;
> -    PNDIS_RW_LOCK_EX bucketLockRef = NULL;
> +
>       *entryCreated = FALSE;
>   
>       /* If an entry was found, update the state based on TCP flags */
> @@ -702,8 +683,9 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>           }
>       } else {
>           CT_UPDATE_RES result;
> -        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto,
> -                                  l4Offset, ctx->reply, currentTime);
> +        UINT32 bucketIdx;
> +        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto, layers,
> +                                  ctx->reply, currentTime);
>           switch (result) {
>           case CT_UPDATE_VALID:
>               state |= OVS_CS_F_ESTABLISHED;
> @@ -716,12 +698,12 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>               break;
>           case CT_UPDATE_NEW:
>               //Delete and update the Conntrack
> -            bucketLockRef = entry->bucketLockRef;
> -            NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
> +            bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
> +            NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
>               OvsCtEntryDelete(ctx->entry, TRUE);
> -            NdisReleaseRWLock(bucketLockRef, &lockStateTable);
> +            NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
>               ctx->entry = NULL;
> -            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
> +            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, layers,
>                                        ctx, key, natInfo, commit, currentTime,
>                                        entryCreated);
>               if (!entry) {
> @@ -869,10 +851,10 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>   
>       /* Delete entry in reverse direction if 'force' is specified */
>       if (force && ctx.reply && entry) {
> -        PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;
> -        NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
> +        UINT32 bucketIdx = ctx.hash & CT_HASH_TABLE_MASK;
> +        NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
>           OvsCtEntryDelete(entry, TRUE);
> -        NdisReleaseRWLock(bucketLockRef, &lockStateTable);
> +        NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
>           entry = NULL;
>       }
>   
> @@ -885,7 +867,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>               OvsCtIncrementCounters(entry, ctx.reply, curNbl);
>           }
>           /* Process the entry and update CT flags */
> -        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
> +        entry = OvsProcessConntrackEntry(fwdCtx, layers, &ctx, key,
>                                            zone, natInfo, commit, currentTime,
>                                            &entryCreated);
>   
> @@ -900,7 +882,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>           }
>           /* If no matching entry was found, create one and add New state */
>           entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
> -                                 layers->l4Offset, &ctx,
> +                                 layers, &ctx,
>                                    key, natInfo, commit, currentTime,
>                                    &entryCreated);
>       }
> @@ -918,13 +900,9 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>        */
>       KIRQL irql = KeGetCurrentIrql();
>       OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
> -    if (natInfo->natAction != NAT_ACTION_NONE)
> -    {
> -        LOCK_STATE_EX lockStateNat;
> -        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
> +    if (natInfo->natAction != NAT_ACTION_NONE) {
>           OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
>                        key, ctx.reply);
> -        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       }
>   
>       OvsCtSetMarkLabel(key, entry, mark, labels, &triggerUpdateEvent);
> @@ -1156,7 +1134,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
>   {
>       PLIST_ENTRY link, next;
>       POVS_CT_ENTRY entry;
> -    LOCK_STATE_EX lockState, lockStateNat;
> +    LOCK_STATE_EX lockState;
>   
>       if (ctTotalEntries) {
>           for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
> @@ -1188,9 +1166,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
>           }
>       }
>   
> -    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
>       OvsNatFlush(zone);
> -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
>       return NDIS_STATUS_SUCCESS;
>   }
>   
> diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
> index 7dc92a1..7a80eea 100644
> --- a/datapath-windows/ovsext/Conntrack.h
> +++ b/datapath-windows/ovsext/Conntrack.h
> @@ -99,8 +99,6 @@ typedef struct _NAT_ACTION_INFO {
>   } NAT_ACTION_INFO, *PNAT_ACTION_INFO;
>   
>   typedef struct OVS_CT_ENTRY {
> -    /* Reference to ovsCtBucketLock of ovsConntrackTable.*/
> -    PNDIS_RW_LOCK_EX bucketLockRef;
>       NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */
>       OVS_CT_KEY  key;
>       OVS_CT_KEY  rev_key;
> @@ -156,23 +154,33 @@ OvsConntrackUpdateExpiration(OVS_CT_ENTRY *ctEntry,
>       ctEntry->expiration = now + interval;
>   }
>   
> -static __inline UINT32
> -OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
> +static const TCPHdr*
> +OvsGetTcpHeader(PNET_BUFFER_LIST nbl,
> +                OVS_PACKET_HDR_INFO *layers,
> +                VOID *storage,
> +                UINT32 *tcpPayloadLen)
>   {
>       IPHdr *ipHdr;
>       TCPHdr *tcp;
> -    char *ipBuf[sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)];
> +    VOID *dest = storage;
>   
> -    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl), sizeof *ipBuf,
> -                              (PVOID)&ipBuf, 1 /*no align*/, 0);
> +    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl),
> +                              layers->l4Offset + sizeof(TCPHdr),
> +                              NULL, 1 /*no align*/, 0);
>       if (ipHdr == NULL) {
> -        return 0;
> +        return NULL;
>       }
>   
> -    ipHdr = (IPHdr *)((PCHAR)ipHdr + sizeof(EthHdr));
> +    ipHdr = (IPHdr *)((PCHAR)ipHdr + layers->l3Offset);
>       tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
> +    if (tcp->doff * 4 >= sizeof *tcp) {
> +        NdisMoveMemory(dest, tcp, sizeof(TCPHdr));
> +        *tcpPayloadLen = ntohs((ipHdr->tot_len) - (ipHdr->ihl * 4) -
> +                               (TCP_HDR_LEN(tcp)));
> +        return storage;
> +    }
>   
> -    return (ntohs(ipHdr->tot_len) - (ipHdr->ihl * 4) - (TCP_HDR_LEN(tcp)));
> +    return NULL;
>   }
>   
>   VOID OvsCleanupConntrack(VOID);
> @@ -184,17 +192,17 @@ NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
>   BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
>   BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp);
>   OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
> -                                          PNET_BUFFER_LIST nbl,
> -                                          UINT64 now);
> +                                          UINT64 now,
> +                                          UINT32 tcpPayloadLen);
>   NDIS_STATUS OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf,
>                                        OVS_CT_ENTRY *conn_);
>   OVS_CT_ENTRY * OvsConntrackCreateOtherEntry(UINT64 now);
>   OVS_CT_ENTRY * OvsConntrackCreateIcmpEntry(UINT64 now);
>   enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
>                                                 const TCPHdr *tcp,
> -                                              PNET_BUFFER_LIST nbl,
>                                                 BOOLEAN reply,
> -                                              UINT64 now);
> +                                              UINT64 now,
> +                                              UINT32 tcpPayloadLen);
>   enum CT_UPDATE_RES OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_,
>                                                   BOOLEAN reply,
>                                                   UINT64 now);
Anand Kumar June 18, 2018, 7:06 p.m. UTC | #2
Hi Shashank,

Thanks for the review. Please find my response inline.

Thanks,
Anand Kumar

On 6/18/18, 11:54 AM, "Shashank Ram" <rams@vmware.com> wrote:

    
    
    On 06/17/2018 10:37 PM, Anand Kumar wrote:
    > This patch primarily gets rid of NdisRWLock in conntrack for NAT
    > functionality along with some conntrack optimization. The subsequent
    > patch will have a lock implementation inside NAT module.
    >
    > - Introduce a new function OvsGetTcpHeader() to retrieve TCP header
    >    and payload length, to optimize for TCP traffic.
    > - Optimize conntrack look up.
    > - Remove 'bucketlockRef' member from conntrack entry structure.
    >
    > Signed-off-by: Anand Kumar <kumaranand@vmware.com>
    > ---
    >   datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
    >   datapath-windows/ovsext/Conntrack-tcp.c |  15 ++---
    >   datapath-windows/ovsext/Conntrack.c     | 110 +++++++++++++-------------------
    >   datapath-windows/ovsext/Conntrack.h     |  36 +++++++----
    >   4 files changed, 74 insertions(+), 91 deletions(-)
    >
    > diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c
    > index 6830dfa..ce09a65 100644
    > --- a/datapath-windows/ovsext/Conntrack-ftp.c
    > +++ b/datapath-windows/ovsext/Conntrack-ftp.c
    > @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
    >       char temp[256] = { 0 };
    >       char ftpMsg[256] = { 0 };
    >   
    > +    UINT32 len;
    >       TCPHdr tcpStorage;
    >       const TCPHdr *tcp;
    > -    tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
    > +    tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &len);
    >       if (!tcp) {
    >           return NDIS_STATUS_INVALID_PACKET;
    >       }
    >   
    > -    UINT32 len = OvsGetTcpPayloadLength(curNbl);
    >       if (len > sizeof(temp)) {
    >           /* We only care up to 256 */
    >           len = sizeof(temp);
    > diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
    > index 8cbab24..eda42ac 100644
    > --- a/datapath-windows/ovsext/Conntrack-tcp.c
    > +++ b/datapath-windows/ovsext/Conntrack-tcp.c
    > @@ -194,9 +194,9 @@ OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
    >   enum CT_UPDATE_RES
    >   OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
    >                              const TCPHdr *tcp,
    > -                           PNET_BUFFER_LIST nbl,
    >                              BOOLEAN reply,
    > -                           UINT64 now)
    > +                           UINT64 now,
    > +                           UINT32 tcpPayloadLen)
    >   {
    >       struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
    >       /* The peer that sent 'pkt' */
    > @@ -207,7 +207,6 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
    >       UINT16 tcp_flags = ntohs(tcp->flags);
    >       uint16_t win = ntohs(tcp->window);
    >       uint32_t ack, end, seq, orig_seq;
    > -    uint32_t p_len = OvsGetTcpPayloadLength(nbl);
    >       int ackskew;
    >   
    >       if (OvsCtInvalidTcpFlags(tcp_flags)) {
    > @@ -248,7 +247,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
    >   
    >           ack = ntohl(tcp->ack_seq);
    >   
    > -        end = seq + p_len;
    > +        end = seq + tcpPayloadLen;
    >           if (tcp_flags & TCP_SYN) {
    >               end++;
    >               if (dst->wscale & CT_WSCALE_FLAG) {
    > @@ -287,7 +286,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
    >   
    >       } else {
    >           ack = ntohl(tcp->ack_seq);
    > -        end = seq + p_len;
    > +        end = seq + tcpPayloadLen;
    >           if (tcp_flags & TCP_SYN) {
    >               end++;
    >           }
    > @@ -469,8 +468,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
    >   
    >   OVS_CT_ENTRY *
    >   OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
    > -                           PNET_BUFFER_LIST nbl,
    > -                           UINT64 now)
    > +                           UINT64 now,
    > +                           UINT32 tcpPayloadLen)
    >   {
    >       struct conn_tcp* newconn;
    >       struct tcp_peer *src, *dst;
    > @@ -486,7 +485,7 @@ OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
    >       dst = &newconn->peer[1];
    >   
    >       src->seqlo = ntohl(tcp->seq);
    > -    src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
    > +    src->seqhi = src->seqlo + tcpPayloadLen + 1;
    >   
    >       if (tcp->flags & TCP_SYN) {
    >           src->seqhi++;
    > diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
    > index 7b54fba..2a85e57 100644
    > --- a/datapath-windows/ovsext/Conntrack.c
    > +++ b/datapath-windows/ovsext/Conntrack.c
    > @@ -32,7 +32,6 @@ KSTART_ROUTINE OvsConntrackEntryCleaner;
    >   static PLIST_ENTRY ovsConntrackTable;
    >   static OVS_CT_THREAD_CTX ctThreadCtx;
    >   static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
    > -static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
    >   extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
    >   static ULONG ctTotalEntries;
    >   
    > @@ -54,19 +53,11 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
    >       ctTotalEntries = 0;
    >       UINT32 numBucketLocks = CT_HASH_TABLE_SIZE;
    >   
    > -    /* Init the sync-lock */
    > -    ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
    > -    if (ovsCtNatLockObj == NULL) {
    > -        return STATUS_INSUFFICIENT_RESOURCES;
    > -    }
    > -
    >       /* Init the Hash Buffer */
    >       ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
    >                                                    * CT_HASH_TABLE_SIZE,
    >                                                    OVS_CT_POOL_TAG);
    >       if (ovsConntrackTable == NULL) {
    > -        NdisFreeRWLock(ovsCtNatLockObj);
    > -        ovsCtNatLockObj = NULL;
    >           return STATUS_INSUFFICIENT_RESOURCES;
    >       }
    >   
    > @@ -121,8 +112,6 @@ freeBucketLock:
    >   freeTable:
    >       OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
    >       ovsConntrackTable = NULL;
    > -    NdisFreeRWLock(ovsCtNatLockObj);
    > -    ovsCtNatLockObj = NULL;
    >       return status;
    >   }
    >   
    > @@ -135,7 +124,6 @@ freeTable:
    >   VOID
    >   OvsCleanupConntrack(VOID)
    >   {
    > -    LOCK_STATE_EX lockStateNat;
    >       ctThreadCtx.exit = 1;
    >       KeSetEvent(&ctThreadCtx.event, 0, FALSE);
    >       KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
    > @@ -160,12 +148,7 @@ OvsCleanupConntrack(VOID)
    >       }
    >       OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
    >       ovsCtBucketLock = NULL;
    > -
    > -    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
    >       OvsNatCleanup();
    > -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    > -    NdisFreeRWLock(ovsCtNatLockObj);
    > -    ovsCtNatLockObj = NULL;
    >   }
    >   
    >   static __inline VOID
    > @@ -244,25 +227,20 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
    >       if (natInfo == NULL) {
    >           entry->natInfo.natAction = NAT_ACTION_NONE;
    >       } else {
    > -        LOCK_STATE_EX lockStateNat;
    > -        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
    >           if (OvsIsForwardNat(natInfo->natAction)) {
    >               entry->natInfo = *natInfo;
    >               if (!OvsNatTranslateCtEntry(entry)) {
    > -                NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    >                   return FALSE;
    >               }
    >               ctx->hash = OvsHashCtKey(&entry->key);
    >           } else {
    >               entry->natInfo.natAction = natInfo->natAction;
    >           }
    > -        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    >       }
    >   
    >       entry->timestampStart = now;
    >       NdisAllocateSpinLock(&(entry->lock));
    >       UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
    > -    entry->bucketLockRef = ovsCtBucketLock[bucketIdx];
    >       NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);
    >       InsertHeadList(&ovsConntrackTable[bucketIdx],
    >                      &entry->link);
    > @@ -275,7 +253,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
    >   static __inline POVS_CT_ENTRY
    >   OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
    >                    UINT8 ipProto,
    > -                 UINT32 l4Offset,
    > +                 OVS_PACKET_HDR_INFO *layers,
    >                    OvsConntrackKeyLookupCtx *ctx,
    >                    OvsFlowKey *key,
    >                    PNAT_ACTION_INFO natInfo,
    > @@ -293,16 +271,18 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
    >       switch (ipProto) {
    >       case IPPROTO_TCP:
    >       {
    > +        UINT32 tcpPayloadLen;
    >           TCPHdr tcpStorage;
    >           const TCPHdr *tcp;
    > -        tcp = OvsGetTcp(curNbl, l4Offset, &tcpStorage);
    > +        tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &tcpPayloadLen);
    >           if (!OvsConntrackValidateTcpPacket(tcp)) {
    >               state = OVS_CS_F_INVALID;
    >               break;
    >           }
    >   
    >           if (commit) {
    > -            entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
    > +            entry = OvsConntrackCreateTcpEntry(tcp, currentTime,
    > +                                               tcpPayloadLen);
    >           }
    >           break;
    >       }
    > @@ -310,7 +290,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
    >       {
    >           ICMPHdr storage;
    >           const ICMPHdr *icmp;
    > -        icmp = OvsGetIcmp(curNbl, l4Offset, &storage);
    > +        icmp = OvsGetIcmp(curNbl, layers->l4Offset, &storage);
    >           if (!OvsConntrackValidateIcmpPacket(icmp)) {
    >               if(icmp) {
    >                   OVS_LOG_TRACE("Invalid ICMP packet detected, icmp->type %u",
    > @@ -371,7 +351,7 @@ static enum CT_UPDATE_RES
    >   OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
    >                    PNET_BUFFER_LIST nbl,
    >                    UINT8 ipProto,
    > -                 UINT32 l4Offset,
    > +                 OVS_PACKET_HDR_INFO *layers,
    >                    BOOLEAN reply,
    >                    UINT64 now)
    >   {
    > @@ -380,15 +360,17 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
    >       switch (ipProto) {
    >       case IPPROTO_TCP:
    >       {
    > +        UINT32 tcpPayloadLen;
    >           TCPHdr tcpStorage;
    >           const TCPHdr *tcp;
    > -        tcp = OvsGetTcp(nbl, l4Offset, &tcpStorage);
    > +        tcp = OvsGetTcpHeader(nbl, layers, &tcpStorage, &tcpPayloadLen);
    >           if (!tcp) {
    >               status = CT_UPDATE_INVALID;
    >               break;
    >           }
    >           OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
    > -        status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
    > +        status = OvsConntrackUpdateTcpEntry(entry, tcp, reply, now,
    > +                                            tcpPayloadLen);
    >           OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
    >           break;
    >       }
    > @@ -433,10 +415,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
    >       OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
    >       if (forceDelete || OvsCtEntryExpired(entry)) {
    >           if (entry->natInfo.natAction) {
    > -            LOCK_STATE_EX lockStateNat;
    > -            NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
    >               OvsNatDeleteKey(&entry->key);
    > -            NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    >           }
    >           OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
    >           RemoveEntryList(&entry->link);
    > @@ -479,15 +458,12 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
    >   }
    >   
    >   BOOLEAN
    > -OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
    > +OvsCtEndpointsAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
    >   {
    >       return ((NdisEqualMemory(&ctxKey.src, &entryKey.src,
    >                                sizeof(struct ct_endpoint))) &&
    >               (NdisEqualMemory(&ctxKey.dst, &entryKey.dst,
    > -                             sizeof(struct ct_endpoint))) &&
    > -            (ctxKey.dl_type == entryKey.dl_type) &&
    > -            (ctxKey.nw_proto == entryKey.nw_proto) &&
    > -            (ctxKey.zone == entryKey.zone));
    > +                             sizeof(struct ct_endpoint))));
    >   }
    >   
    >   POVS_CT_ENTRY
    > @@ -499,6 +475,11 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
    >       POVS_CT_ENTRY found = NULL;
    >       LOCK_STATE_EX lockStateTable;
    >       UINT32 bucketIdx;
    > +
    > +    if (!ctTotalEntries) {
    > +        return found;
    > +    }
    > +
    
    The counter above needs protection to guard against change?
[AK]: Not needed, since we are not modifying the value. This is needed only to avoid lookup when there are no entries.

    >       /* Reverse NAT must be performed before OvsCtLookup, so here
    >        * we simply need to flip the src and dst in key and compare
    >        * they are equal. Note that flipped key is not equal to
    > @@ -507,10 +488,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
    >       OVS_CT_KEY revCtxKey = ctx->key;
    >       OvsCtKeyReverse(&revCtxKey);
    >   
    > -    if (!ctTotalEntries) {
    > -        return found;
    > -    }
    > -
    >       KIRQL irql = KeGetCurrentIrql();
    >       bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
    >       NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
    > @@ -518,12 +495,19 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
    >           entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
    >           OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
    >   
    > -        if (OvsCtKeyAreSame(ctx->key, entry->key)) {
    > +        if ((ctx->key.dl_type != entry->key.dl_type) ||
    > +            (ctx->key.nw_proto != entry->key.nw_proto) ||
    > +            (ctx->key.zone != entry->key.zone)) {
    > +            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
    > +            continue;
    > +        }
    > +
    > +        if (OvsCtEndpointsAreSame(ctx->key, entry->key)) {
    >               found = entry;
    >               reply = FALSE;
    >           }
    >   
    > -        if (!found && OvsCtKeyAreSame(revCtxKey, entry->key)) {
    > +        if (!found && OvsCtEndpointsAreSame(revCtxKey, entry->key)) {
    >               found = entry;
    >               reply = TRUE;
    >           }
    > @@ -649,10 +633,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
    >           return NDIS_STATUS_INVALID_PACKET;
    >       }
    >   
    > -    LOCK_STATE_EX lockStateNat;
    > -    NdisAcquireRWLockRead(ovsCtNatLockObj, &lockStateNat, 0);
    >       natEntry = OvsNatLookup(&ctx->key, TRUE);
    > -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    >       if (natEntry) {
    >           /* Translate address first for reverse NAT */
    >           ctx->key = natEntry->ctEntry->key;
    > @@ -678,7 +659,7 @@ OvsDetectFtpPacket(OvsFlowKey *key) {
    >    */
    >   static __inline POVS_CT_ENTRY
    >   OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
    > -                         UINT32 l4Offset,
    > +                         OVS_PACKET_HDR_INFO *layers,
    >                            OvsConntrackKeyLookupCtx *ctx,
    >                            OvsFlowKey *key,
    >                            UINT16 zone,
    > @@ -691,7 +672,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
    >       UINT32 state = 0;
    >       PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
    >       LOCK_STATE_EX lockStateTable;
    > -    PNDIS_RW_LOCK_EX bucketLockRef = NULL;
    > +
    >       *entryCreated = FALSE;
    >   
    >       /* If an entry was found, update the state based on TCP flags */
    > @@ -702,8 +683,9 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
    >           }
    >       } else {
    >           CT_UPDATE_RES result;
    > -        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto,
    > -                                  l4Offset, ctx->reply, currentTime);
    > +        UINT32 bucketIdx;
    > +        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto, layers,
    > +                                  ctx->reply, currentTime);
    >           switch (result) {
    >           case CT_UPDATE_VALID:
    >               state |= OVS_CS_F_ESTABLISHED;
    > @@ -716,12 +698,12 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
    >               break;
    >           case CT_UPDATE_NEW:
    >               //Delete and update the Conntrack
    > -            bucketLockRef = entry->bucketLockRef;
    > -            NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
    > +            bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
    > +            NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
    >               OvsCtEntryDelete(ctx->entry, TRUE);
    > -            NdisReleaseRWLock(bucketLockRef, &lockStateTable);
    > +            NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
    >               ctx->entry = NULL;
    > -            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
    > +            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, layers,
    >                                        ctx, key, natInfo, commit, currentTime,
    >                                        entryCreated);
    >               if (!entry) {
    > @@ -869,10 +851,10 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
    >   
    >       /* Delete entry in reverse direction if 'force' is specified */
    >       if (force && ctx.reply && entry) {
    > -        PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;
    > -        NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
    > +        UINT32 bucketIdx = ctx.hash & CT_HASH_TABLE_MASK;
    > +        NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
    >           OvsCtEntryDelete(entry, TRUE);
    > -        NdisReleaseRWLock(bucketLockRef, &lockStateTable);
    > +        NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
    >           entry = NULL;
    >       }
    >   
    > @@ -885,7 +867,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
    >               OvsCtIncrementCounters(entry, ctx.reply, curNbl);
    >           }
    >           /* Process the entry and update CT flags */
    > -        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
    > +        entry = OvsProcessConntrackEntry(fwdCtx, layers, &ctx, key,
    >                                            zone, natInfo, commit, currentTime,
    >                                            &entryCreated);
    >   
    > @@ -900,7 +882,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
    >           }
    >           /* If no matching entry was found, create one and add New state */
    >           entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
    > -                                 layers->l4Offset, &ctx,
    > +                                 layers, &ctx,
    >                                    key, natInfo, commit, currentTime,
    >                                    &entryCreated);
    >       }
    > @@ -918,13 +900,9 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
    >        */
    >       KIRQL irql = KeGetCurrentIrql();
    >       OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
    > -    if (natInfo->natAction != NAT_ACTION_NONE)
    > -    {
    > -        LOCK_STATE_EX lockStateNat;
    > -        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
    > +    if (natInfo->natAction != NAT_ACTION_NONE) {
    >           OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
    >                        key, ctx.reply);
    > -        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    >       }
    >   
    >       OvsCtSetMarkLabel(key, entry, mark, labels, &triggerUpdateEvent);
    > @@ -1156,7 +1134,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
    >   {
    >       PLIST_ENTRY link, next;
    >       POVS_CT_ENTRY entry;
    > -    LOCK_STATE_EX lockState, lockStateNat;
    > +    LOCK_STATE_EX lockState;
    >   
    >       if (ctTotalEntries) {
    >           for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
    > @@ -1188,9 +1166,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
    >           }
    >       }
    >   
    > -    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
    >       OvsNatFlush(zone);
    > -    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
    >       return NDIS_STATUS_SUCCESS;
    >   }
    >   
    > diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
    > index 7dc92a1..7a80eea 100644
    > --- a/datapath-windows/ovsext/Conntrack.h
    > +++ b/datapath-windows/ovsext/Conntrack.h
    > @@ -99,8 +99,6 @@ typedef struct _NAT_ACTION_INFO {
    >   } NAT_ACTION_INFO, *PNAT_ACTION_INFO;
    >   
    >   typedef struct OVS_CT_ENTRY {
    > -    /* Reference to ovsCtBucketLock of ovsConntrackTable.*/
    > -    PNDIS_RW_LOCK_EX bucketLockRef;
    >       NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */
    >       OVS_CT_KEY  key;
    >       OVS_CT_KEY  rev_key;
    > @@ -156,23 +154,33 @@ OvsConntrackUpdateExpiration(OVS_CT_ENTRY *ctEntry,
    >       ctEntry->expiration = now + interval;
    >   }
    >   
    > -static __inline UINT32
    > -OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
    > +static const TCPHdr*
    > +OvsGetTcpHeader(PNET_BUFFER_LIST nbl,
    > +                OVS_PACKET_HDR_INFO *layers,
    > +                VOID *storage,
    > +                UINT32 *tcpPayloadLen)
    >   {
    >       IPHdr *ipHdr;
    >       TCPHdr *tcp;
    > -    char *ipBuf[sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)];
    > +    VOID *dest = storage;
    >   
    > -    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl), sizeof *ipBuf,
    > -                              (PVOID)&ipBuf, 1 /*no align*/, 0);
    > +    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl),
    > +                              layers->l4Offset + sizeof(TCPHdr),
    > +                              NULL, 1 /*no align*/, 0);
    >       if (ipHdr == NULL) {
    > -        return 0;
    > +        return NULL;
    >       }
    >   
    > -    ipHdr = (IPHdr *)((PCHAR)ipHdr + sizeof(EthHdr));
    > +    ipHdr = (IPHdr *)((PCHAR)ipHdr + layers->l3Offset);
    >       tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
    > +    if (tcp->doff * 4 >= sizeof *tcp) {
    > +        NdisMoveMemory(dest, tcp, sizeof(TCPHdr));
    > +        *tcpPayloadLen = ntohs((ipHdr->tot_len) - (ipHdr->ihl * 4) -
    > +                               (TCP_HDR_LEN(tcp)));
    > +        return storage;
    > +    }
    >   
    > -    return (ntohs(ipHdr->tot_len) - (ipHdr->ihl * 4) - (TCP_HDR_LEN(tcp)));
    > +    return NULL;
    >   }
    >   
    >   VOID OvsCleanupConntrack(VOID);
    > @@ -184,17 +192,17 @@ NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
    >   BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
    >   BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp);
    >   OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
    > -                                          PNET_BUFFER_LIST nbl,
    > -                                          UINT64 now);
    > +                                          UINT64 now,
    > +                                          UINT32 tcpPayloadLen);
    >   NDIS_STATUS OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf,
    >                                        OVS_CT_ENTRY *conn_);
    >   OVS_CT_ENTRY * OvsConntrackCreateOtherEntry(UINT64 now);
    >   OVS_CT_ENTRY * OvsConntrackCreateIcmpEntry(UINT64 now);
    >   enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
    >                                                 const TCPHdr *tcp,
    > -                                              PNET_BUFFER_LIST nbl,
    >                                                 BOOLEAN reply,
    > -                                              UINT64 now);
    > +                                              UINT64 now,
    > +                                              UINT32 tcpPayloadLen);
    >   enum CT_UPDATE_RES OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_,
    >                                                   BOOLEAN reply,
    >                                                   UINT64 now);
diff mbox series

Patch

diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c
index 6830dfa..ce09a65 100644
--- a/datapath-windows/ovsext/Conntrack-ftp.c
+++ b/datapath-windows/ovsext/Conntrack-ftp.c
@@ -129,14 +129,14 @@  OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
     char temp[256] = { 0 };
     char ftpMsg[256] = { 0 };
 
+    UINT32 len;
     TCPHdr tcpStorage;
     const TCPHdr *tcp;
-    tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
+    tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &len);
     if (!tcp) {
         return NDIS_STATUS_INVALID_PACKET;
     }
 
-    UINT32 len = OvsGetTcpPayloadLength(curNbl);
     if (len > sizeof(temp)) {
         /* We only care up to 256 */
         len = sizeof(temp);
diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
index 8cbab24..eda42ac 100644
--- a/datapath-windows/ovsext/Conntrack-tcp.c
+++ b/datapath-windows/ovsext/Conntrack-tcp.c
@@ -194,9 +194,9 @@  OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
 enum CT_UPDATE_RES
 OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
                            const TCPHdr *tcp,
-                           PNET_BUFFER_LIST nbl,
                            BOOLEAN reply,
-                           UINT64 now)
+                           UINT64 now,
+                           UINT32 tcpPayloadLen)
 {
     struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
     /* The peer that sent 'pkt' */
@@ -207,7 +207,6 @@  OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
     UINT16 tcp_flags = ntohs(tcp->flags);
     uint16_t win = ntohs(tcp->window);
     uint32_t ack, end, seq, orig_seq;
-    uint32_t p_len = OvsGetTcpPayloadLength(nbl);
     int ackskew;
 
     if (OvsCtInvalidTcpFlags(tcp_flags)) {
@@ -248,7 +247,7 @@  OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
 
         ack = ntohl(tcp->ack_seq);
 
-        end = seq + p_len;
+        end = seq + tcpPayloadLen;
         if (tcp_flags & TCP_SYN) {
             end++;
             if (dst->wscale & CT_WSCALE_FLAG) {
@@ -287,7 +286,7 @@  OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
 
     } else {
         ack = ntohl(tcp->ack_seq);
-        end = seq + p_len;
+        end = seq + tcpPayloadLen;
         if (tcp_flags & TCP_SYN) {
             end++;
         }
@@ -469,8 +468,8 @@  OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
 
 OVS_CT_ENTRY *
 OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
-                           PNET_BUFFER_LIST nbl,
-                           UINT64 now)
+                           UINT64 now,
+                           UINT32 tcpPayloadLen)
 {
     struct conn_tcp* newconn;
     struct tcp_peer *src, *dst;
@@ -486,7 +485,7 @@  OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
     dst = &newconn->peer[1];
 
     src->seqlo = ntohl(tcp->seq);
-    src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
+    src->seqhi = src->seqlo + tcpPayloadLen + 1;
 
     if (tcp->flags & TCP_SYN) {
         src->seqhi++;
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 7b54fba..2a85e57 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -32,7 +32,6 @@  KSTART_ROUTINE OvsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
-static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 static ULONG ctTotalEntries;
 
@@ -54,19 +53,11 @@  OvsInitConntrack(POVS_SWITCH_CONTEXT context)
     ctTotalEntries = 0;
     UINT32 numBucketLocks = CT_HASH_TABLE_SIZE;
 
-    /* Init the sync-lock */
-    ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
-    if (ovsCtNatLockObj == NULL) {
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-
     /* Init the Hash Buffer */
     ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
                                                  * CT_HASH_TABLE_SIZE,
                                                  OVS_CT_POOL_TAG);
     if (ovsConntrackTable == NULL) {
-        NdisFreeRWLock(ovsCtNatLockObj);
-        ovsCtNatLockObj = NULL;
         return STATUS_INSUFFICIENT_RESOURCES;
     }
 
@@ -121,8 +112,6 @@  freeBucketLock:
 freeTable:
     OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
     ovsConntrackTable = NULL;
-    NdisFreeRWLock(ovsCtNatLockObj);
-    ovsCtNatLockObj = NULL;
     return status;
 }
 
@@ -135,7 +124,6 @@  freeTable:
 VOID
 OvsCleanupConntrack(VOID)
 {
-    LOCK_STATE_EX lockStateNat;
     ctThreadCtx.exit = 1;
     KeSetEvent(&ctThreadCtx.event, 0, FALSE);
     KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
@@ -160,12 +148,7 @@  OvsCleanupConntrack(VOID)
     }
     OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
     ovsCtBucketLock = NULL;
-
-    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
     OvsNatCleanup();
-    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
-    NdisFreeRWLock(ovsCtNatLockObj);
-    ovsCtNatLockObj = NULL;
 }
 
 static __inline VOID
@@ -244,25 +227,20 @@  OvsCtAddEntry(POVS_CT_ENTRY entry,
     if (natInfo == NULL) {
         entry->natInfo.natAction = NAT_ACTION_NONE;
     } else {
-        LOCK_STATE_EX lockStateNat;
-        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
         if (OvsIsForwardNat(natInfo->natAction)) {
             entry->natInfo = *natInfo;
             if (!OvsNatTranslateCtEntry(entry)) {
-                NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
                 return FALSE;
             }
             ctx->hash = OvsHashCtKey(&entry->key);
         } else {
             entry->natInfo.natAction = natInfo->natAction;
         }
-        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
     }
 
     entry->timestampStart = now;
     NdisAllocateSpinLock(&(entry->lock));
     UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
-    entry->bucketLockRef = ovsCtBucketLock[bucketIdx];
     NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);
     InsertHeadList(&ovsConntrackTable[bucketIdx],
                    &entry->link);
@@ -275,7 +253,7 @@  OvsCtAddEntry(POVS_CT_ENTRY entry,
 static __inline POVS_CT_ENTRY
 OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
                  UINT8 ipProto,
-                 UINT32 l4Offset,
+                 OVS_PACKET_HDR_INFO *layers,
                  OvsConntrackKeyLookupCtx *ctx,
                  OvsFlowKey *key,
                  PNAT_ACTION_INFO natInfo,
@@ -293,16 +271,18 @@  OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
     switch (ipProto) {
     case IPPROTO_TCP:
     {
+        UINT32 tcpPayloadLen;
         TCPHdr tcpStorage;
         const TCPHdr *tcp;
-        tcp = OvsGetTcp(curNbl, l4Offset, &tcpStorage);
+        tcp = OvsGetTcpHeader(curNbl, layers, &tcpStorage, &tcpPayloadLen);
         if (!OvsConntrackValidateTcpPacket(tcp)) {
             state = OVS_CS_F_INVALID;
             break;
         }
 
         if (commit) {
-            entry = OvsConntrackCreateTcpEntry(tcp, curNbl, currentTime);
+            entry = OvsConntrackCreateTcpEntry(tcp, currentTime,
+                                               tcpPayloadLen);
         }
         break;
     }
@@ -310,7 +290,7 @@  OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
     {
         ICMPHdr storage;
         const ICMPHdr *icmp;
-        icmp = OvsGetIcmp(curNbl, l4Offset, &storage);
+        icmp = OvsGetIcmp(curNbl, layers->l4Offset, &storage);
         if (!OvsConntrackValidateIcmpPacket(icmp)) {
             if(icmp) {
                 OVS_LOG_TRACE("Invalid ICMP packet detected, icmp->type %u",
@@ -371,7 +351,7 @@  static enum CT_UPDATE_RES
 OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
                  PNET_BUFFER_LIST nbl,
                  UINT8 ipProto,
-                 UINT32 l4Offset,
+                 OVS_PACKET_HDR_INFO *layers,
                  BOOLEAN reply,
                  UINT64 now)
 {
@@ -380,15 +360,17 @@  OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
     switch (ipProto) {
     case IPPROTO_TCP:
     {
+        UINT32 tcpPayloadLen;
         TCPHdr tcpStorage;
         const TCPHdr *tcp;
-        tcp = OvsGetTcp(nbl, l4Offset, &tcpStorage);
+        tcp = OvsGetTcpHeader(nbl, layers, &tcpStorage, &tcpPayloadLen);
         if (!tcp) {
             status = CT_UPDATE_INVALID;
             break;
         }
         OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
-        status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
+        status = OvsConntrackUpdateTcpEntry(entry, tcp, reply, now,
+                                            tcpPayloadLen);
         OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
         break;
     }
@@ -433,10 +415,7 @@  OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
     OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
     if (forceDelete || OvsCtEntryExpired(entry)) {
         if (entry->natInfo.natAction) {
-            LOCK_STATE_EX lockStateNat;
-            NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
             OvsNatDeleteKey(&entry->key);
-            NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
         }
         OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
         RemoveEntryList(&entry->link);
@@ -479,15 +458,12 @@  OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
 }
 
 BOOLEAN
-OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
+OvsCtEndpointsAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
 {
     return ((NdisEqualMemory(&ctxKey.src, &entryKey.src,
                              sizeof(struct ct_endpoint))) &&
             (NdisEqualMemory(&ctxKey.dst, &entryKey.dst,
-                             sizeof(struct ct_endpoint))) &&
-            (ctxKey.dl_type == entryKey.dl_type) &&
-            (ctxKey.nw_proto == entryKey.nw_proto) &&
-            (ctxKey.zone == entryKey.zone));
+                             sizeof(struct ct_endpoint))));
 }
 
 POVS_CT_ENTRY
@@ -499,6 +475,11 @@  OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     POVS_CT_ENTRY found = NULL;
     LOCK_STATE_EX lockStateTable;
     UINT32 bucketIdx;
+
+    if (!ctTotalEntries) {
+        return found;
+    }
+
     /* Reverse NAT must be performed before OvsCtLookup, so here
      * we simply need to flip the src and dst in key and compare
      * they are equal. Note that flipped key is not equal to
@@ -507,10 +488,6 @@  OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     OVS_CT_KEY revCtxKey = ctx->key;
     OvsCtKeyReverse(&revCtxKey);
 
-    if (!ctTotalEntries) {
-        return found;
-    }
-
     KIRQL irql = KeGetCurrentIrql();
     bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
     NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
@@ -518,12 +495,19 @@  OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
         OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
 
-        if (OvsCtKeyAreSame(ctx->key, entry->key)) {
+        if ((ctx->key.dl_type != entry->key.dl_type) ||
+            (ctx->key.nw_proto != entry->key.nw_proto) ||
+            (ctx->key.zone != entry->key.zone)) {
+            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
+            continue;
+        }
+
+        if (OvsCtEndpointsAreSame(ctx->key, entry->key)) {
             found = entry;
             reply = FALSE;
         }
 
-        if (!found && OvsCtKeyAreSame(revCtxKey, entry->key)) {
+        if (!found && OvsCtEndpointsAreSame(revCtxKey, entry->key)) {
             found = entry;
             reply = TRUE;
         }
@@ -649,10 +633,7 @@  OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
         return NDIS_STATUS_INVALID_PACKET;
     }
 
-    LOCK_STATE_EX lockStateNat;
-    NdisAcquireRWLockRead(ovsCtNatLockObj, &lockStateNat, 0);
     natEntry = OvsNatLookup(&ctx->key, TRUE);
-    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
     if (natEntry) {
         /* Translate address first for reverse NAT */
         ctx->key = natEntry->ctEntry->key;
@@ -678,7 +659,7 @@  OvsDetectFtpPacket(OvsFlowKey *key) {
  */
 static __inline POVS_CT_ENTRY
 OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
-                         UINT32 l4Offset,
+                         OVS_PACKET_HDR_INFO *layers,
                          OvsConntrackKeyLookupCtx *ctx,
                          OvsFlowKey *key,
                          UINT16 zone,
@@ -691,7 +672,7 @@  OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
     UINT32 state = 0;
     PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     LOCK_STATE_EX lockStateTable;
-    PNDIS_RW_LOCK_EX bucketLockRef = NULL;
+
     *entryCreated = FALSE;
 
     /* If an entry was found, update the state based on TCP flags */
@@ -702,8 +683,9 @@  OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
         }
     } else {
         CT_UPDATE_RES result;
-        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto,
-                                  l4Offset, ctx->reply, currentTime);
+        UINT32 bucketIdx;
+        result = OvsCtUpdateEntry(entry, curNbl, key->ipKey.nwProto, layers,
+                                  ctx->reply, currentTime);
         switch (result) {
         case CT_UPDATE_VALID:
             state |= OVS_CS_F_ESTABLISHED;
@@ -716,12 +698,12 @@  OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
             break;
         case CT_UPDATE_NEW:
             //Delete and update the Conntrack
-            bucketLockRef = entry->bucketLockRef;
-            NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
+            bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
+            NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
             OvsCtEntryDelete(ctx->entry, TRUE);
-            NdisReleaseRWLock(bucketLockRef, &lockStateTable);
+            NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
             ctx->entry = NULL;
-            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset,
+            entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, layers,
                                      ctx, key, natInfo, commit, currentTime,
                                      entryCreated);
             if (!entry) {
@@ -869,10 +851,10 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
 
     /* Delete entry in reverse direction if 'force' is specified */
     if (force && ctx.reply && entry) {
-        PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;
-        NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
+        UINT32 bucketIdx = ctx.hash & CT_HASH_TABLE_MASK;
+        NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
         OvsCtEntryDelete(entry, TRUE);
-        NdisReleaseRWLock(bucketLockRef, &lockStateTable);
+        NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
         entry = NULL;
     }
 
@@ -885,7 +867,7 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
             OvsCtIncrementCounters(entry, ctx.reply, curNbl);
         }
         /* Process the entry and update CT flags */
-        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
+        entry = OvsProcessConntrackEntry(fwdCtx, layers, &ctx, key,
                                          zone, natInfo, commit, currentTime,
                                          &entryCreated);
 
@@ -900,7 +882,7 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
         }
         /* If no matching entry was found, create one and add New state */
         entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
-                                 layers->l4Offset, &ctx,
+                                 layers, &ctx,
                                  key, natInfo, commit, currentTime,
                                  &entryCreated);
     }
@@ -918,13 +900,9 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
      */
     KIRQL irql = KeGetCurrentIrql();
     OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
-    if (natInfo->natAction != NAT_ACTION_NONE)
-    {
-        LOCK_STATE_EX lockStateNat;
-        NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
+    if (natInfo->natAction != NAT_ACTION_NONE) {
         OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
                      key, ctx.reply);
-        NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
     }
 
     OvsCtSetMarkLabel(key, entry, mark, labels, &triggerUpdateEvent);
@@ -1156,7 +1134,7 @@  OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
 {
     PLIST_ENTRY link, next;
     POVS_CT_ENTRY entry;
-    LOCK_STATE_EX lockState, lockStateNat;
+    LOCK_STATE_EX lockState;
 
     if (ctTotalEntries) {
         for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
@@ -1188,9 +1166,7 @@  OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple)
         }
     }
 
-    NdisAcquireRWLockWrite(ovsCtNatLockObj, &lockStateNat, 0);
     OvsNatFlush(zone);
-    NdisReleaseRWLock(ovsCtNatLockObj, &lockStateNat);
     return NDIS_STATUS_SUCCESS;
 }
 
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index 7dc92a1..7a80eea 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -99,8 +99,6 @@  typedef struct _NAT_ACTION_INFO {
 } NAT_ACTION_INFO, *PNAT_ACTION_INFO;
 
 typedef struct OVS_CT_ENTRY {
-    /* Reference to ovsCtBucketLock of ovsConntrackTable.*/
-    PNDIS_RW_LOCK_EX bucketLockRef;
     NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */
     OVS_CT_KEY  key;
     OVS_CT_KEY  rev_key;
@@ -156,23 +154,33 @@  OvsConntrackUpdateExpiration(OVS_CT_ENTRY *ctEntry,
     ctEntry->expiration = now + interval;
 }
 
-static __inline UINT32
-OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
+static const TCPHdr*
+OvsGetTcpHeader(PNET_BUFFER_LIST nbl,
+                OVS_PACKET_HDR_INFO *layers,
+                VOID *storage,
+                UINT32 *tcpPayloadLen)
 {
     IPHdr *ipHdr;
     TCPHdr *tcp;
-    char *ipBuf[sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)];
+    VOID *dest = storage;
 
-    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl), sizeof *ipBuf,
-                              (PVOID)&ipBuf, 1 /*no align*/, 0);
+    ipHdr = NdisGetDataBuffer(NET_BUFFER_LIST_FIRST_NB(nbl),
+                              layers->l4Offset + sizeof(TCPHdr),
+                              NULL, 1 /*no align*/, 0);
     if (ipHdr == NULL) {
-        return 0;
+        return NULL;
     }
 
-    ipHdr = (IPHdr *)((PCHAR)ipHdr + sizeof(EthHdr));
+    ipHdr = (IPHdr *)((PCHAR)ipHdr + layers->l3Offset);
     tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
+    if (tcp->doff * 4 >= sizeof *tcp) {
+        NdisMoveMemory(dest, tcp, sizeof(TCPHdr));
+        *tcpPayloadLen = ntohs((ipHdr->tot_len) - (ipHdr->ihl * 4) -
+                               (TCP_HDR_LEN(tcp)));
+        return storage;
+    }
 
-    return (ntohs(ipHdr->tot_len) - (ipHdr->ihl * 4) - (TCP_HDR_LEN(tcp)));
+    return NULL;
 }
 
 VOID OvsCleanupConntrack(VOID);
@@ -184,17 +192,17 @@  NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
 BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
 BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp);
 OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp,
-                                          PNET_BUFFER_LIST nbl,
-                                          UINT64 now);
+                                          UINT64 now,
+                                          UINT32 tcpPayloadLen);
 NDIS_STATUS OvsCtMapTcpProtoInfoToNl(PNL_BUFFER nlBuf,
                                      OVS_CT_ENTRY *conn_);
 OVS_CT_ENTRY * OvsConntrackCreateOtherEntry(UINT64 now);
 OVS_CT_ENTRY * OvsConntrackCreateIcmpEntry(UINT64 now);
 enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
                                               const TCPHdr *tcp,
-                                              PNET_BUFFER_LIST nbl,
                                               BOOLEAN reply,
-                                              UINT64 now);
+                                              UINT64 now,
+                                              UINT32 tcpPayloadLen);
 enum CT_UPDATE_RES OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_,
                                                 BOOLEAN reply,
                                                 UINT64 now);