diff mbox series

[ovs-dev,v4,1/4] datapath-windows: Use spinlock instead of RW lock for ct entry

Message ID 20180618053729.2372-2-kumaranand@vmware.com
State Superseded
Headers show
Series Optimize conntrack performance | expand

Commit Message

Anand Kumar June 18, 2018, 5:37 a.m. UTC
This patch mainly changes a ndis RW lock for conntrack entry to a
spinlock along with some minor refactor in conntrack. Using
spinlock instead of RW lock as RW locks causes performance hits
when acquired/released multiple times.

- Use NdisInterlockedXX wrapper api's instead of InterlockedXX.
- Update 'ctTotalRelatedEntries' using interlocked functions.
- Move conntrack lock out of NAT module.

Signed-off-by: Anand Kumar <kumaranand@vmware.com>
---
 datapath-windows/ovsext/Conntrack-nat.c     |   7 +-
 datapath-windows/ovsext/Conntrack-related.c |  17 ++--
 datapath-windows/ovsext/Conntrack.c         | 134 ++++++++++++++--------------
 datapath-windows/ovsext/Conntrack.h         |   2 +-
 datapath-windows/ovsext/Util.h              |  18 ++++
 5 files changed, 93 insertions(+), 85 deletions(-)

Comments

Shashank Ram June 18, 2018, 6:27 p.m. UTC | #1
On 06/17/2018 10:37 PM, Anand Kumar wrote:
> This patch mainly changes a ndis RW lock for conntrack entry to a
> spinlock along with some minor refactor in conntrack. Using
> spinlock instead of RW lock as RW locks causes performance hits
> when acquired/released multiple times.
>
> - Use NdisInterlockedXX wrapper api's instead of InterlockedXX.
> - Update 'ctTotalRelatedEntries' using interlocked functions.
> - Move conntrack lock out of NAT module.
>
> Signed-off-by: Anand Kumar <kumaranand@vmware.com>
> ---
>   datapath-windows/ovsext/Conntrack-nat.c     |   7 +-
>   datapath-windows/ovsext/Conntrack-related.c |  17 ++--
>   datapath-windows/ovsext/Conntrack.c         | 134 ++++++++++++++--------------
>   datapath-windows/ovsext/Conntrack.h         |   2 +-
>   datapath-windows/ovsext/Util.h              |  18 ++++
>   5 files changed, 93 insertions(+), 85 deletions(-)
>
> diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
> index 316c946..da1814f 100644
> --- a/datapath-windows/ovsext/Conntrack-nat.c
> +++ b/datapath-windows/ovsext/Conntrack-nat.c
> @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
>   {
>       UINT32 natFlag;
>       const struct ct_endpoint* endpoint;
> -    LOCK_STATE_EX lockState;
> -    /* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */
> -    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
> +
>       /* When it is NAT, only entry->rev_key contains NATTED address;
>          When it is unNAT, only entry->key contains the UNNATTED address;*/
>       const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
>       BOOLEAN isSrcNat;
>   
>       if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
> -        NdisReleaseRWLock(entry->lock, &lockState);
>           return;
>       }
>       isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
> @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
>           }
>       } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
>           // XXX: IPv6 packet not supported yet.
> -        NdisReleaseRWLock(entry->lock, &lockState);
>           return;
>       }
>       if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
> @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
>               }
>           }
>       }
> -    NdisReleaseRWLock(entry->lock, &lockState);
>   }
>   
>   
> diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c
> index ec4b536..b798137 100644
> --- a/datapath-windows/ovsext/Conntrack-related.c
> +++ b/datapath-windows/ovsext/Conntrack-related.c
> @@ -18,7 +18,7 @@
>   #include "Jhash.h"
>   
>   static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */
> -static UINT64 ctTotalRelatedEntries;
> +static ULONG ctTotalRelatedEntries;
>   static OVS_CT_THREAD_CTX ctRelThreadCtx;
>   static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj;
>   extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
> @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime)
>       POVS_CT_REL_ENTRY entry;
>       LOCK_STATE_EX lockState;
>   
> -    NdisAcquireRWLockRead(ovsCtRelatedLockObj, &lockState, 0);
> -
>       if (!ctTotalRelatedEntries) {
> -        NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);
>           return NULL;
>       }
>   
> +    NdisAcquireRWLockRead(ovsCtRelatedLockObj, &lockState, 0);
>       for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
>           /* XXX - Scan the table based on the hash instead */
>           LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {
> @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry)
>   {
>       RemoveEntryList(&entry->link);
>       OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
> -    ctTotalRelatedEntries--;
> +    NdisInterlockedDecrement((PLONG)&ctTotalRelatedEntries);
>   }
>   
>   NDIS_STATUS
> @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto,
>       NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
>       InsertHeadList(&ovsCtRelatedTable[hash & CT_HASH_TABLE_MASK],
>                      &entry->link);
> -    ctTotalRelatedEntries++;
> +    NdisInterlockedIncrement((PLONG)&ctTotalRelatedEntries);
>       NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);
>   
>       return NDIS_STATUS_SUCCESS;
> @@ -150,11 +148,10 @@ OvsCtRelatedFlush()
>   {
>       PLIST_ENTRY link, next;
>       POVS_CT_REL_ENTRY entry;
> -
>       LOCK_STATE_EX lockState;
> -    NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
>   
>       if (ctTotalRelatedEntries) {
> +        NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
>           for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
>               LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {
>                   entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);
> @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data)
>               /* Lock has been freed by 'OvsCleanupCtRelated()' */
>               break;
>           }
> -        NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
> +
>           if (context->exit) {
> -            NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);
>               break;
>           }
>   
> @@ -201,6 +197,7 @@ OvsCtRelatedEntryCleaner(PVOID data)
>           threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
>   
>           if (ctTotalRelatedEntries) {
> +            NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
>               for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
>                   LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {
>                       entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);
> diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
> index add1491..7b54fba 100644
> --- a/datapath-windows/ovsext/Conntrack.c
> +++ b/datapath-windows/ovsext/Conntrack.c
> @@ -34,7 +34,7 @@ static OVS_CT_THREAD_CTX ctThreadCtx;
>   static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
>   static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
>   extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
> -static LONG ctTotalEntries;
> +static ULONG ctTotalEntries;
>   
>   static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple);
>   static __inline NDIS_STATUS
> @@ -208,7 +208,6 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
>   {
>       OVS_CT_EVENT_ENTRY ctEventEntry = {0};
>       NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY));
> -    ctEventEntry.entry.lock = NULL;
>       ctEventEntry.entry.parent = NULL;
>       ctEventEntry.type = type;
>       OvsPostCtEvent(&ctEventEntry);
> @@ -217,8 +216,8 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
>   static __inline VOID
>   OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
>   {
> -    LOCK_STATE_EX lockState;
> -    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
> +    KIRQL irql = KeGetCurrentIrql();
> +    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>       if (reply) {
>           entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);
>           entry->rev_key.packetCount++;
> @@ -226,11 +225,11 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
>           entry->key.byteCount += OvsPacketLenNBL(nbl);
>           entry->key.packetCount++;
>       }
> -    NdisReleaseRWLock(entry->lock, &lockState);
> +    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
Can we simply use NdisInterlocked functions instead of having a lock to 
perform increments?
Also, in case the lock is only being acquired once within a function, 
there is no need to calculate the dispatch level, let NDIS do it. 
Calculating the dispatch level helps only in cases where the locks are 
being acquired multiple times within a given context.
>   }
>   
>   static __inline BOOLEAN
> -OvsCtAddEntry(POVS_SWITCH_CONTEXT switchContext, POVS_CT_ENTRY entry,
> +OvsCtAddEntry(POVS_CT_ENTRY entry,
>                 OvsConntrackKeyLookupCtx *ctx,
>                 PNAT_ACTION_INFO natInfo, UINT64 now)
>   {
> @@ -261,18 +260,14 @@ OvsCtAddEntry(POVS_SWITCH_CONTEXT switchContext, POVS_CT_ENTRY entry,
>       }
>   
>       entry->timestampStart = now;
> -    entry->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);
> -    if (entry->lock == NULL) {
> -        OVS_LOG_ERROR("NdisAllocateRWLock failed : Insufficient resources");
> -        return FALSE;
> -    }
> +    NdisAllocateSpinLock(&(entry->lock));
>       UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
>       entry->bucketLockRef = ovsCtBucketLock[bucketIdx];
>       NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);
>       InsertHeadList(&ovsConntrackTable[bucketIdx],
>                      &entry->link);
>   
> -    InterlockedIncrement((LONG volatile *)&ctTotalEntries);
> +    NdisInterlockedIncrement((PLONG)&ctTotalEntries);
>       NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockState);
>       return TRUE;
>   }
> @@ -351,8 +346,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
>       if (state != OVS_CS_F_INVALID && commit) {
>           if (entry) {
>               entry->parent = parentEntry;
> -            if (OvsCtAddEntry(fwdCtx->switchContext, entry, ctx,
> -                              natInfo, currentTime)) {
> +            if (OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
>                   *entryCreated = TRUE;
>               } else {
>                   /* Unable to add entry to the list */
> @@ -382,8 +376,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>                    UINT64 now)
>   {
>       CT_UPDATE_RES status;
> -    LOCK_STATE_EX lockState;
> -    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
> +    KIRQL irql = KeGetCurrentIrql();
>       switch (ipProto) {
>       case IPPROTO_TCP:
>       {
> @@ -394,20 +387,25 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
>               status = CT_UPDATE_INVALID;
>               break;
>           }
> +        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>           status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
> +        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>           break;
>       }
Could you fix the style here for consistency by removing the braces for 
the case above.
>       case IPPROTO_ICMP:
> +        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>           status = OvsConntrackUpdateIcmpEntry(entry, reply, now);
> +        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>           break;
>       case IPPROTO_UDP:
> +        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>           status = OvsConntrackUpdateOtherEntry(entry, reply, now);
> +        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>           break;
>       default:
>           status = CT_UPDATE_INVALID;
>           break;
>       }
> -    NdisReleaseRWLock(entry->lock, &lockState);
>       return status;
>   }
>   
> @@ -431,8 +429,8 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
>       if (entry == NULL) {
>           return;
>       }
> -    LOCK_STATE_EX lockState;
> -    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
> +    KIRQL irql = KeGetCurrentIrql();
> +    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>       if (forceDelete || OvsCtEntryExpired(entry)) {
>           if (entry->natInfo.natAction) {
>               LOCK_STATE_EX lockStateNat;
> @@ -442,13 +440,13 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
>           }
>           OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
>           RemoveEntryList(&entry->link);
> -        NdisReleaseRWLock(entry->lock, &lockState);
> -        NdisFreeRWLock(entry->lock);
> +        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
> +        NdisFreeSpinLock(&(entry->lock));
>           OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
> -        InterlockedDecrement((LONG volatile*)&ctTotalEntries);
> +        NdisInterlockedDecrement((PLONG)&ctTotalEntries);
>           return;
>       }
> -    NdisReleaseRWLock(entry->lock, &lockState);
> +    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
Why is the entry lock not being freed here?
>   }
>   
>   static __inline NDIS_STATUS
> @@ -499,7 +497,7 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>       POVS_CT_ENTRY entry;
>       BOOLEAN reply = FALSE;
>       POVS_CT_ENTRY found = NULL;
> -    LOCK_STATE_EX lockState, lockStateTable;
> +    LOCK_STATE_EX lockStateTable;
>       UINT32 bucketIdx;
>       /* Reverse NAT must be performed before OvsCtLookup, so here
>        * we simply need to flip the src and dst in key and compare
> @@ -512,11 +510,14 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>       if (!ctTotalEntries) {
>           return found;
>       }
> +
> +    KIRQL irql = KeGetCurrentIrql();
>       bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
>       NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
>       LIST_FORALL(&ovsConntrackTable[bucketIdx], link) {
>           entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
> -        NdisAcquireRWLockRead(entry->lock, &lockState, 0);
> +        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
> +
>           if (OvsCtKeyAreSame(ctx->key, entry->key)) {
>               found = entry;
>               reply = FALSE;
> @@ -533,10 +534,10 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
>               } else {
>                   ctx->reply = reply;
>               }
> -            NdisReleaseRWLock(entry->lock, &lockState);
> +            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>               break;
>           }
> -        NdisReleaseRWLock(entry->lock, &lockState);
> +        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>       }
>   
>       NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
> @@ -689,7 +690,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>       POVS_CT_ENTRY entry = ctx->entry;
>       UINT32 state = 0;
>       PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
> -    LOCK_STATE_EX lockState, lockStateTable;
> +    LOCK_STATE_EX lockStateTable;
>       PNDIS_RW_LOCK_EX bucketLockRef = NULL;
>       *entryCreated = FALSE;
>   
> @@ -730,7 +731,8 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>           }
>       }
>       if (entry) {
> -        NdisAcquireRWLockRead(entry->lock, &lockState, 0);
> +        KIRQL irql = KeGetCurrentIrql();
> +        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
No need to figure out IRQL every time.
>           if (key->ipKey.nwProto == IPPROTO_TCP) {
>               /* Update the related bit if there is a parent */
>               if (entry->parent) {
> @@ -748,7 +750,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
>           /* Copy mark and label from entry into flowKey. If actions specify
>              different mark and label, update the flowKey. */
>           OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
> -        NdisReleaseRWLock(entry->lock, &lockState);
> +        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>       } else {
>           OvsCtUpdateFlowKey(key, state, zone, 0, NULL);
>       }
> @@ -802,8 +804,6 @@ OvsCtSetMarkLabel(OvsFlowKey *key,
>                          MD_LABELS *labels,
>                          BOOLEAN *triggerUpdateEvent)
>   {
> -    LOCK_STATE_EX lockState;
> -    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
>       if (mark) {
>           OvsConntrackSetMark(key, entry, mark->value, mark->mask,
>                               triggerUpdateEvent);
> @@ -813,7 +813,6 @@ OvsCtSetMarkLabel(OvsFlowKey *key,
>           OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask,
>                                 triggerUpdateEvent);
>       }
> -    NdisReleaseRWLock(entry->lock, &lockState);
>   }
>   
>   /*
> @@ -854,10 +853,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>   {
>       NDIS_STATUS status = NDIS_STATUS_SUCCESS;
>       BOOLEAN triggerUpdateEvent = FALSE;
> +    BOOLEAN entryCreated = FALSE;
>       POVS_CT_ENTRY entry = NULL;
>       PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
>       OvsConntrackKeyLookupCtx ctx = { 0 };
> -    LOCK_STATE_EX lockState, lockStateTable;
> +    LOCK_STATE_EX lockStateTable;
>       UINT64 currentTime;
>       NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
>   
> @@ -866,10 +866,9 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>   
>       /* Lookup Conntrack entries for a matching entry */
>       entry = OvsCtLookup(&ctx);
> -    BOOLEAN entryCreated = FALSE;
>   
>       /* Delete entry in reverse direction if 'force' is specified */
> -    if (entry && force && ctx.reply) {
> +    if (force && ctx.reply && entry) {
>           PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;
>           NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
>           OvsCtEntryDelete(entry, TRUE);
> @@ -877,34 +876,33 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>           entry = NULL;
>       }
>   
> -    if (!entry && commit && ctTotalEntries >= CT_MAX_ENTRIES) {
> -        /* Don't proceed with processing if the max limit has been hit.
> -         * This blocks only new entries from being created and doesn't
> -         * affect existing connections.
> +    if (entry) {
> +        /* Increment stats for the entry if it wasn't tracked previously or
> +         * if they are on different zones
>            */
> -        OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
> -        return NDIS_STATUS_RESOURCES;
> -    }
> -
> -    /* Increment stats for the entry if it wasn't tracked previously or
> -     * if they are on different zones
> -     */
> -    if (entry && (entry->key.zone != key->ct.zone ||
> -           (!(key->ct.state & OVS_CS_F_TRACKED)))) {
> -        OvsCtIncrementCounters(entry, ctx.reply, curNbl);
> -    }
> +        if ((entry->key.zone != key->ct.zone ||
> +               (!(key->ct.state & OVS_CS_F_TRACKED)))) {
> +            OvsCtIncrementCounters(entry, ctx.reply, curNbl);
> +        }
> +        /* Process the entry and update CT flags */
> +        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
> +                                         zone, natInfo, commit, currentTime,
> +                                         &entryCreated);
>   
> -    if (!entry) {
> +    } else {
> +        if (commit && ctTotalEntries >= CT_MAX_ENTRIES) {
> +            /* Don't proceed with processing if the max limit has been hit.
> +             * This blocks only new entries from being created and doesn't
> +             * affect existing connections.
> +             */
> +            OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
> +            return NDIS_STATUS_RESOURCES;
> +        }
>           /* If no matching entry was found, create one and add New state */
>           entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
>                                    layers->l4Offset, &ctx,
>                                    key, natInfo, commit, currentTime,
>                                    &entryCreated);
> -    } else {
> -        /* Process the entry and update CT flags */
> -        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
> -                                         zone, natInfo, commit, currentTime,
> -                                         &entryCreated);
>       }
>   
>       if (entry == NULL) {
> @@ -918,6 +916,8 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>        * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
>        * NAT_ACTION_DST without NAT_ACTION_REVERSE
>        */
> +    KIRQL irql = KeGetCurrentIrql();
> +    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>       if (natInfo->natAction != NAT_ACTION_NONE)
>       {
>           LOCK_STATE_EX lockStateNat;
> @@ -939,15 +939,14 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>               OVS_LOG_ERROR("Error while parsing the FTP packet");
>           }
>       }
> -    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
> +
>       /* Add original tuple information to flow Key */
>       if (entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) {
>           if (entry->parent != NULL) {
>               POVS_CT_ENTRY parent = entry->parent;
> -            LOCK_STATE_EX lockStateParent;
> -            NdisAcquireRWLockRead(parent->lock, &lockStateParent, 0);
> +            OVS_ACQUIRE_SPIN_LOCK(&(parent->lock), irql);
>               OvsCtUpdateTuple(key, &parent->key);
> -            NdisReleaseRWLock(parent->lock, &lockStateParent);
> +            OVS_RELEASE_SPIN_LOCK(&(parent->lock), irql);
>           } else {
>               OvsCtUpdateTuple(key, &entry->key);
>           }
> @@ -955,13 +954,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
>   
>       if (entryCreated) {
>           OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW);
> -    }
> -    if (postUpdateEvent && !entryCreated && triggerUpdateEvent) {
> +    } else if (postUpdateEvent && triggerUpdateEvent) {
>           OvsPostCtEventEntry(entry, OVS_EVENT_CT_UPDATE);
>       }
>   
> -    NdisReleaseRWLock(entry->lock, &lockState);
> -
> +    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>       return status;
>   }
>   
> @@ -1738,8 +1735,9 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
>       UINT32 inIndex = instance->dumpState.index[1];
>       UINT32 i = CT_HASH_TABLE_SIZE;
>       UINT32 outIndex = 0;
> +    KIRQL irql = KeGetCurrentIrql();
> +    LOCK_STATE_EX lockStateTable;
>   
> -    LOCK_STATE_EX lockState, lockStateTable;
>       if (ctTotalEntries) {
>           for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {
>               PLIST_ENTRY head, link;
> @@ -1756,7 +1754,7 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
>                    */
>                   if (outIndex >= inIndex) {
>                       entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
> -                    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
> +                    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
>                       rc = OvsCreateNlMsgFromCtEntry(entry,
>                                                      usrParamsCtx->outputBuffer,
>                                                      usrParamsCtx->outputLength,
> @@ -1765,7 +1763,7 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
>                                                      msgIn->nlMsg.nlmsgPid,
>                                                      msgIn->nfGenMsg.version,
>                                                      0);
> -                    NdisReleaseRWLock(entry->lock, &lockState);
> +                    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
>                       if (rc != NDIS_STATUS_SUCCESS) {
>                           NdisReleaseRWLock(ovsCtBucketLock[i], &lockStateTable);
>                           return STATUS_UNSUCCESSFUL;
> diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
> index 3be309e..7dc92a1 100644
> --- a/datapath-windows/ovsext/Conntrack.h
> +++ b/datapath-windows/ovsext/Conntrack.h
> @@ -101,7 +101,7 @@ typedef struct _NAT_ACTION_INFO {
>   typedef struct OVS_CT_ENTRY {
>       /* Reference to ovsCtBucketLock of ovsConntrackTable.*/
>       PNDIS_RW_LOCK_EX bucketLockRef;
> -    PNDIS_RW_LOCK_EX lock;       /* Protects OVS_CT_ENTRY. */
> +    NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */
>       OVS_CT_KEY  key;
>       OVS_CT_KEY  rev_key;
>       UINT64      expiration;
> diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h
> index 6f02147..a9bccf3 100644
> --- a/datapath-windows/ovsext/Util.h
> +++ b/datapath-windows/ovsext/Util.h
> @@ -105,6 +105,24 @@ VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
>   #define BIT16(_x)                       ((UINT16)0x1 << (_x))
>   #define BIT32(_x)                       ((UINT32)0x1 << (_x))
>   
> +#define OVS_ACQUIRE_SPIN_LOCK(_pLock, _dispatchLevel)                        \
> +{                                                                            \
> +    if (_dispatchLevel) {                                                    \
> +        NdisDprAcquireSpinLock(_pLock);                                      \
> +    } else {                                                                 \
> +        NdisAcquireSpinLock(_pLock);                                         \
> +    }                                                                        \
> +}
> +
> +#define OVS_RELEASE_SPIN_LOCK(_pLock, _dispatchLevel)                        \
> +{                                                                            \
> +    if (_dispatchLevel) {                                                    \
> +        NdisDprReleaseSpinLock(_pLock);                                      \
> +    } else {                                                                 \
> +        NdisReleaseSpinLock(_pLock);                                         \
> +    }                                                                        \
> +}
> +
>   BOOLEAN OvsCompareString(PVOID string1, PVOID string2);
>   
>   /*
Anand Kumar June 18, 2018, 7:02 p.m. UTC | #2
Hi Shashank,

Thanks for the review.
Please find my response inline.

Thanks,
Anand Kumar

From: Shashank Ram <rams@vmware.com>
Date: Monday, June 18, 2018 at 11:27 AM
To: Anand Kumar <kumaranand@vmware.com>, "dev@openvswitch.org" <dev@openvswitch.org>
Subject: Re: [ovs-dev] [PATCH v4 1/4] datapath-windows: Use spinlock instead of RW lock for ct entry




On 06/17/2018 10:37 PM, Anand Kumar wrote:

This patch mainly changes a ndis RW lock for conntrack entry to a

spinlock along with some minor refactor in conntrack. Using

spinlock instead of RW lock as RW locks causes performance hits

when acquired/released multiple times.



- Use NdisInterlockedXX wrapper api's instead of InterlockedXX.

- Update 'ctTotalRelatedEntries' using interlocked functions.

- Move conntrack lock out of NAT module.



Signed-off-by: Anand Kumar <kumaranand@vmware.com><mailto:kumaranand@vmware.com>

---

 datapath-windows/ovsext/Conntrack-nat.c     |   7 +-

 datapath-windows/ovsext/Conntrack-related.c |  17 ++--

 datapath-windows/ovsext/Conntrack.c         | 134 ++++++++++++++--------------

 datapath-windows/ovsext/Conntrack.h         |   2 +-

 datapath-windows/ovsext/Util.h              |  18 ++++

 5 files changed, 93 insertions(+), 85 deletions(-)



diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c

index 316c946..da1814f 100644

--- a/datapath-windows/ovsext/Conntrack-nat.c

+++ b/datapath-windows/ovsext/Conntrack-nat.c

@@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,

 {

     UINT32 natFlag;

     const struct ct_endpoint* endpoint;

-    LOCK_STATE_EX lockState;

-    /* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */

-    NdisAcquireRWLockRead(entry->lock, &lockState, 0);

+

     /* When it is NAT, only entry->rev_key contains NATTED address;

        When it is unNAT, only entry->key contains the UNNATTED address;*/

     const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;

     BOOLEAN isSrcNat;



     if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {

-        NdisReleaseRWLock(entry->lock, &lockState);

         return;

     }

     isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||

@@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,

         }

     } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){

         // XXX: IPv6 packet not supported yet.

-        NdisReleaseRWLock(entry->lock, &lockState);

         return;

     }

     if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {

@@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,

             }

         }

     }

-    NdisReleaseRWLock(entry->lock, &lockState);

 }





diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c

index ec4b536..b798137 100644

--- a/datapath-windows/ovsext/Conntrack-related.c

+++ b/datapath-windows/ovsext/Conntrack-related.c

@@ -18,7 +18,7 @@

 #include "Jhash.h"



 static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */

-static UINT64 ctTotalRelatedEntries;

+static ULONG ctTotalRelatedEntries;

 static OVS_CT_THREAD_CTX ctRelThreadCtx;

 static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj;

 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;

@@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime)

     POVS_CT_REL_ENTRY entry;

     LOCK_STATE_EX lockState;



-    NdisAcquireRWLockRead(ovsCtRelatedLockObj, &lockState, 0);

-

     if (!ctTotalRelatedEntries) {

-        NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);

         return NULL;

     }



+    NdisAcquireRWLockRead(ovsCtRelatedLockObj, &lockState, 0);

     for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {

         /* XXX - Scan the table based on the hash instead */

         LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {

@@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry)

 {

     RemoveEntryList(&entry->link);

     OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);

-    ctTotalRelatedEntries--;

+    NdisInterlockedDecrement((PLONG)&ctTotalRelatedEntries);

 }



 NDIS_STATUS

@@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto,

     NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);

     InsertHeadList(&ovsCtRelatedTable[hash & CT_HASH_TABLE_MASK],

                    &entry->link);

-    ctTotalRelatedEntries++;

+    NdisInterlockedIncrement((PLONG)&ctTotalRelatedEntries);

     NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);



     return NDIS_STATUS_SUCCESS;

@@ -150,11 +148,10 @@ OvsCtRelatedFlush()

 {

     PLIST_ENTRY link, next;

     POVS_CT_REL_ENTRY entry;

-

     LOCK_STATE_EX lockState;

-    NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);



     if (ctTotalRelatedEntries) {

+        NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);

         for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {

             LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {

                 entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);

@@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data)

             /* Lock has been freed by 'OvsCleanupCtRelated()' */

             break;

         }

-        NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);

+

         if (context->exit) {

-            NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);

             break;

         }



@@ -201,6 +197,7 @@ OvsCtRelatedEntryCleaner(PVOID data)

         threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;



         if (ctTotalRelatedEntries) {

+            NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);

             for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {

                 LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {

                     entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);

diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c

index add1491..7b54fba 100644

--- a/datapath-windows/ovsext/Conntrack.c

+++ b/datapath-windows/ovsext/Conntrack.c

@@ -34,7 +34,7 @@ static OVS_CT_THREAD_CTX ctThreadCtx;

 static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;

 static PNDIS_RW_LOCK_EX ovsCtNatLockObj;

 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;

-static LONG ctTotalEntries;

+static ULONG ctTotalEntries;



 static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple);

 static __inline NDIS_STATUS

@@ -208,7 +208,6 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)

 {

     OVS_CT_EVENT_ENTRY ctEventEntry = {0};

     NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY));

-    ctEventEntry.entry.lock = NULL;

     ctEventEntry.entry.parent = NULL;

     ctEventEntry.type = type;

     OvsPostCtEvent(&ctEventEntry);

@@ -217,8 +216,8 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)

 static __inline VOID

 OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)

 {

-    LOCK_STATE_EX lockState;

-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);

+    KIRQL irql = KeGetCurrentIrql();

+    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

     if (reply) {

         entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);

         entry->rev_key.packetCount++;

@@ -226,11 +225,11 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)

         entry->key.byteCount += OvsPacketLenNBL(nbl);

         entry->key.packetCount++;

     }

-    NdisReleaseRWLock(entry->lock, &lockState);

+    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
Can we simply use NdisInterlocked functions instead of having a lock to perform increments?
Also, in case the lock is only being acquired once within a function, there is no need to calculate the dispatch level, let NDIS do it. Calculating the dispatch level helps only in cases where the locks are being acquired multiple times within a given context.
[AK]: The stats are part of ct entry structure, we need lock to make sure entry is not being modified by some other thread.
Regarding calculating the dispatch level for lock, I did this to keep the code consistent in conntrack.

 }



 static __inline BOOLEAN

-OvsCtAddEntry(POVS_SWITCH_CONTEXT switchContext, POVS_CT_ENTRY entry,

+OvsCtAddEntry(POVS_CT_ENTRY entry,

               OvsConntrackKeyLookupCtx *ctx,

               PNAT_ACTION_INFO natInfo, UINT64 now)

 {

@@ -261,18 +260,14 @@ OvsCtAddEntry(POVS_SWITCH_CONTEXT switchContext, POVS_CT_ENTRY entry,

     }



     entry->timestampStart = now;

-    entry->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);

-    if (entry->lock == NULL) {

-        OVS_LOG_ERROR("NdisAllocateRWLock failed : Insufficient resources");

-        return FALSE;

-    }

+    NdisAllocateSpinLock(&(entry->lock));

     UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;

     entry->bucketLockRef = ovsCtBucketLock[bucketIdx];

     NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);

     InsertHeadList(&ovsConntrackTable[bucketIdx],

                    &entry->link);



-    InterlockedIncrement((LONG volatile *)&ctTotalEntries);

+    NdisInterlockedIncrement((PLONG)&ctTotalEntries);

     NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockState);

     return TRUE;

 }

@@ -351,8 +346,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,

     if (state != OVS_CS_F_INVALID && commit) {

         if (entry) {

             entry->parent = parentEntry;

-            if (OvsCtAddEntry(fwdCtx->switchContext, entry, ctx,

-                              natInfo, currentTime)) {

+            if (OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {

                 *entryCreated = TRUE;

             } else {

                 /* Unable to add entry to the list */

@@ -382,8 +376,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,

                  UINT64 now)

 {

     CT_UPDATE_RES status;

-    LOCK_STATE_EX lockState;

-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);

+    KIRQL irql = KeGetCurrentIrql();

     switch (ipProto) {

     case IPPROTO_TCP:

     {

@@ -394,20 +387,25 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,

             status = CT_UPDATE_INVALID;

             break;

         }

+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

         status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);

+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

         break;

     }
Could you fix the style here for consistency by removing the braces for the case above.
[AK]: Done

     case IPPROTO_ICMP:

+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

         status = OvsConntrackUpdateIcmpEntry(entry, reply, now);

+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

         break;

     case IPPROTO_UDP:

+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

         status = OvsConntrackUpdateOtherEntry(entry, reply, now);

+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

         break;

     default:

         status = CT_UPDATE_INVALID;

         break;

     }

-    NdisReleaseRWLock(entry->lock, &lockState);

     return status;

 }



@@ -431,8 +429,8 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)

     if (entry == NULL) {

         return;

     }

-    LOCK_STATE_EX lockState;

-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);

+    KIRQL irql = KeGetCurrentIrql();

+    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

     if (forceDelete || OvsCtEntryExpired(entry)) {

         if (entry->natInfo.natAction) {

             LOCK_STATE_EX lockStateNat;

@@ -442,13 +440,13 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)

         }

         OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);

         RemoveEntryList(&entry->link);

-        NdisReleaseRWLock(entry->lock, &lockState);

-        NdisFreeRWLock(entry->lock);

+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

+        NdisFreeSpinLock(&(entry->lock));

         OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);

-        InterlockedDecrement((LONG volatile*)&ctTotalEntries);

+        NdisInterlockedDecrement((PLONG)&ctTotalEntries);

         return;

     }

-    NdisReleaseRWLock(entry->lock, &lockState);

+    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
Why is the entry lock not being freed here?
[AK]: Entry lock will be freed only when entry is deleted.

 }



 static __inline NDIS_STATUS

@@ -499,7 +497,7 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)

     POVS_CT_ENTRY entry;

     BOOLEAN reply = FALSE;

     POVS_CT_ENTRY found = NULL;

-    LOCK_STATE_EX lockState, lockStateTable;

+    LOCK_STATE_EX lockStateTable;

     UINT32 bucketIdx;

     /* Reverse NAT must be performed before OvsCtLookup, so here

      * we simply need to flip the src and dst in key and compare

@@ -512,11 +510,14 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)

     if (!ctTotalEntries) {

         return found;

     }

+

+    KIRQL irql = KeGetCurrentIrql();

     bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;

     NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);

     LIST_FORALL(&ovsConntrackTable[bucketIdx], link) {

         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);

-        NdisAcquireRWLockRead(entry->lock, &lockState, 0);

+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

+

         if (OvsCtKeyAreSame(ctx->key, entry->key)) {

             found = entry;

             reply = FALSE;

@@ -533,10 +534,10 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)

             } else {

                 ctx->reply = reply;

             }

-            NdisReleaseRWLock(entry->lock, &lockState);

+            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

             break;

         }

-        NdisReleaseRWLock(entry->lock, &lockState);

+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

     }



     NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);

@@ -689,7 +690,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,

     POVS_CT_ENTRY entry = ctx->entry;

     UINT32 state = 0;

     PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;

-    LOCK_STATE_EX lockState, lockStateTable;

+    LOCK_STATE_EX lockStateTable;

     PNDIS_RW_LOCK_EX bucketLockRef = NULL;

     *entryCreated = FALSE;



@@ -730,7 +731,8 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,

         }

     }

     if (entry) {

-        NdisAcquireRWLockRead(entry->lock, &lockState, 0);

+        KIRQL irql = KeGetCurrentIrql();

+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
No need to figure out IRQL every time.
[AK]: I have kept it so that code is consistent in conntrack.

         if (key->ipKey.nwProto == IPPROTO_TCP) {

             /* Update the related bit if there is a parent */

             if (entry->parent) {

@@ -748,7 +750,7 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,

         /* Copy mark and label from entry into flowKey. If actions specify

            different mark and label, update the flowKey. */

         OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);

-        NdisReleaseRWLock(entry->lock, &lockState);

+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

     } else {

         OvsCtUpdateFlowKey(key, state, zone, 0, NULL);

     }

@@ -802,8 +804,6 @@ OvsCtSetMarkLabel(OvsFlowKey *key,

                        MD_LABELS *labels,

                        BOOLEAN *triggerUpdateEvent)

 {

-    LOCK_STATE_EX lockState;

-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);

     if (mark) {

         OvsConntrackSetMark(key, entry, mark->value, mark->mask,

                             triggerUpdateEvent);

@@ -813,7 +813,6 @@ OvsCtSetMarkLabel(OvsFlowKey *key,

         OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask,

                               triggerUpdateEvent);

     }

-    NdisReleaseRWLock(entry->lock, &lockState);

 }



 /*

@@ -854,10 +853,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,

 {

     NDIS_STATUS status = NDIS_STATUS_SUCCESS;

     BOOLEAN triggerUpdateEvent = FALSE;

+    BOOLEAN entryCreated = FALSE;

     POVS_CT_ENTRY entry = NULL;

     PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;

     OvsConntrackKeyLookupCtx ctx = { 0 };

-    LOCK_STATE_EX lockState, lockStateTable;

+    LOCK_STATE_EX lockStateTable;

     UINT64 currentTime;

     NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);



@@ -866,10 +866,9 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,



     /* Lookup Conntrack entries for a matching entry */

     entry = OvsCtLookup(&ctx);

-    BOOLEAN entryCreated = FALSE;



     /* Delete entry in reverse direction if 'force' is specified */

-    if (entry && force && ctx.reply) {

+    if (force && ctx.reply && entry) {

         PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;

         NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);

         OvsCtEntryDelete(entry, TRUE);

@@ -877,34 +876,33 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,

         entry = NULL;

     }



-    if (!entry && commit && ctTotalEntries >= CT_MAX_ENTRIES) {

-        /* Don't proceed with processing if the max limit has been hit.

-         * This blocks only new entries from being created and doesn't

-         * affect existing connections.

+    if (entry) {

+        /* Increment stats for the entry if it wasn't tracked previously or

+         * if they are on different zones

          */

-        OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);

-        return NDIS_STATUS_RESOURCES;

-    }

-

-    /* Increment stats for the entry if it wasn't tracked previously or

-     * if they are on different zones

-     */

-    if (entry && (entry->key.zone != key->ct.zone ||

-           (!(key->ct.state & OVS_CS_F_TRACKED)))) {

-        OvsCtIncrementCounters(entry, ctx.reply, curNbl);

-    }

+        if ((entry->key.zone != key->ct.zone ||

+               (!(key->ct.state & OVS_CS_F_TRACKED)))) {

+            OvsCtIncrementCounters(entry, ctx.reply, curNbl);

+        }

+        /* Process the entry and update CT flags */

+        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,

+                                         zone, natInfo, commit, currentTime,

+                                         &entryCreated);



-    if (!entry) {

+    } else {

+        if (commit && ctTotalEntries >= CT_MAX_ENTRIES) {

+            /* Don't proceed with processing if the max limit has been hit.

+             * This blocks only new entries from being created and doesn't

+             * affect existing connections.

+             */

+            OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);

+            return NDIS_STATUS_RESOURCES;

+        }

         /* If no matching entry was found, create one and add New state */

         entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,

                                  layers->l4Offset, &ctx,

                                  key, natInfo, commit, currentTime,

                                  &entryCreated);

-    } else {

-        /* Process the entry and update CT flags */

-        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,

-                                         zone, natInfo, commit, currentTime,

-                                         &entryCreated);

     }



     if (entry == NULL) {

@@ -918,6 +916,8 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,

      * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or

      * NAT_ACTION_DST without NAT_ACTION_REVERSE

      */

+    KIRQL irql = KeGetCurrentIrql();

+    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

     if (natInfo->natAction != NAT_ACTION_NONE)

     {

         LOCK_STATE_EX lockStateNat;

@@ -939,15 +939,14 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,

             OVS_LOG_ERROR("Error while parsing the FTP packet");

         }

     }

-    NdisAcquireRWLockRead(entry->lock, &lockState, 0);

+

     /* Add original tuple information to flow Key */

     if (entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) {

         if (entry->parent != NULL) {

             POVS_CT_ENTRY parent = entry->parent;

-            LOCK_STATE_EX lockStateParent;

-            NdisAcquireRWLockRead(parent->lock, &lockStateParent, 0);

+            OVS_ACQUIRE_SPIN_LOCK(&(parent->lock), irql);

             OvsCtUpdateTuple(key, &parent->key);

-            NdisReleaseRWLock(parent->lock, &lockStateParent);

+            OVS_RELEASE_SPIN_LOCK(&(parent->lock), irql);

         } else {

             OvsCtUpdateTuple(key, &entry->key);

         }

@@ -955,13 +954,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,



     if (entryCreated) {

         OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW);

-    }

-    if (postUpdateEvent && !entryCreated && triggerUpdateEvent) {

+    } else if (postUpdateEvent && triggerUpdateEvent) {

         OvsPostCtEventEntry(entry, OVS_EVENT_CT_UPDATE);

     }



-    NdisReleaseRWLock(entry->lock, &lockState);

-

+    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

     return status;

 }



@@ -1738,8 +1735,9 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,

     UINT32 inIndex = instance->dumpState.index[1];

     UINT32 i = CT_HASH_TABLE_SIZE;

     UINT32 outIndex = 0;

+    KIRQL irql = KeGetCurrentIrql();

+    LOCK_STATE_EX lockStateTable;



-    LOCK_STATE_EX lockState, lockStateTable;

     if (ctTotalEntries) {

         for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {

             PLIST_ENTRY head, link;

@@ -1756,7 +1754,7 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,

                  */

                 if (outIndex >= inIndex) {

                     entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);

-                    NdisAcquireRWLockRead(entry->lock, &lockState, 0);

+                    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);

                     rc = OvsCreateNlMsgFromCtEntry(entry,

                                                    usrParamsCtx->outputBuffer,

                                                    usrParamsCtx->outputLength,

@@ -1765,7 +1763,7 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,

                                                    msgIn->nlMsg.nlmsgPid,

                                                    msgIn->nfGenMsg.version,

                                                    0);

-                    NdisReleaseRWLock(entry->lock, &lockState);

+                    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);

                     if (rc != NDIS_STATUS_SUCCESS) {

                         NdisReleaseRWLock(ovsCtBucketLock[i], &lockStateTable);

                         return STATUS_UNSUCCESSFUL;

diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h

index 3be309e..7dc92a1 100644

--- a/datapath-windows/ovsext/Conntrack.h

+++ b/datapath-windows/ovsext/Conntrack.h

@@ -101,7 +101,7 @@ typedef struct _NAT_ACTION_INFO {

 typedef struct OVS_CT_ENTRY {

     /* Reference to ovsCtBucketLock of ovsConntrackTable.*/

     PNDIS_RW_LOCK_EX bucketLockRef;

-    PNDIS_RW_LOCK_EX lock;       /* Protects OVS_CT_ENTRY. */

+    NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */

     OVS_CT_KEY  key;

     OVS_CT_KEY  rev_key;

     UINT64      expiration;

diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h

index 6f02147..a9bccf3 100644

--- a/datapath-windows/ovsext/Util.h

+++ b/datapath-windows/ovsext/Util.h

@@ -105,6 +105,24 @@ VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);

 #define BIT16(_x)                       ((UINT16)0x1 << (_x))

 #define BIT32(_x)                       ((UINT32)0x1 << (_x))



+#define OVS_ACQUIRE_SPIN_LOCK(_pLock, _dispatchLevel)                        \

+{                                                                            \

+    if (_dispatchLevel) {                                                    \

+        NdisDprAcquireSpinLock(_pLock);                                      \

+    } else {                                                                 \

+        NdisAcquireSpinLock(_pLock);                                         \

+    }                                                                        \

+}

+

+#define OVS_RELEASE_SPIN_LOCK(_pLock, _dispatchLevel)                        \

+{                                                                            \

+    if (_dispatchLevel) {                                                    \

+        NdisDprReleaseSpinLock(_pLock);                                      \

+    } else {                                                                 \

+        NdisReleaseSpinLock(_pLock);                                         \

+    }                                                                        \

+}

+

 BOOLEAN OvsCompareString(PVOID string1, PVOID string2);



 /*
diff mbox series

Patch

diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
index 316c946..da1814f 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -167,16 +167,13 @@  OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 {
     UINT32 natFlag;
     const struct ct_endpoint* endpoint;
-    LOCK_STATE_EX lockState;
-    /* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */
-    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
+
     /* When it is NAT, only entry->rev_key contains NATTED address;
        When it is unNAT, only entry->key contains the UNNATTED address;*/
     const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
     BOOLEAN isSrcNat;
 
     if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
-        NdisReleaseRWLock(entry->lock, &lockState);
         return;
     }
     isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
@@ -206,7 +203,6 @@  OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
         }
     } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
         // XXX: IPv6 packet not supported yet.
-        NdisReleaseRWLock(entry->lock, &lockState);
         return;
     }
     if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
@@ -220,7 +216,6 @@  OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
             }
         }
     }
-    NdisReleaseRWLock(entry->lock, &lockState);
 }
 
 
diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c
index ec4b536..b798137 100644
--- a/datapath-windows/ovsext/Conntrack-related.c
+++ b/datapath-windows/ovsext/Conntrack-related.c
@@ -18,7 +18,7 @@ 
 #include "Jhash.h"
 
 static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */
-static UINT64 ctTotalRelatedEntries;
+static ULONG ctTotalRelatedEntries;
 static OVS_CT_THREAD_CTX ctRelThreadCtx;
 static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
@@ -75,13 +75,11 @@  OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime)
     POVS_CT_REL_ENTRY entry;
     LOCK_STATE_EX lockState;
 
-    NdisAcquireRWLockRead(ovsCtRelatedLockObj, &lockState, 0);
-
     if (!ctTotalRelatedEntries) {
-        NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);
         return NULL;
     }
 
+    NdisAcquireRWLockRead(ovsCtRelatedLockObj, &lockState, 0);
     for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
         /* XXX - Scan the table based on the hash instead */
         LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {
@@ -103,7 +101,7 @@  OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry)
 {
     RemoveEntryList(&entry->link);
     OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
-    ctTotalRelatedEntries--;
+    NdisInterlockedDecrement((PLONG)&ctTotalRelatedEntries);
 }
 
 NDIS_STATUS
@@ -139,7 +137,7 @@  OvsCtRelatedEntryCreate(UINT8 ipProto,
     NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
     InsertHeadList(&ovsCtRelatedTable[hash & CT_HASH_TABLE_MASK],
                    &entry->link);
-    ctTotalRelatedEntries++;
+    NdisInterlockedIncrement((PLONG)&ctTotalRelatedEntries);
     NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);
 
     return NDIS_STATUS_SUCCESS;
@@ -150,11 +148,10 @@  OvsCtRelatedFlush()
 {
     PLIST_ENTRY link, next;
     POVS_CT_REL_ENTRY entry;
-
     LOCK_STATE_EX lockState;
-    NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
 
     if (ctTotalRelatedEntries) {
+        NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
         for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
             LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {
                 entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);
@@ -189,9 +186,8 @@  OvsCtRelatedEntryCleaner(PVOID data)
             /* Lock has been freed by 'OvsCleanupCtRelated()' */
             break;
         }
-        NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
+
         if (context->exit) {
-            NdisReleaseRWLock(ovsCtRelatedLockObj, &lockState);
             break;
         }
 
@@ -201,6 +197,7 @@  OvsCtRelatedEntryCleaner(PVOID data)
         threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
 
         if (ctTotalRelatedEntries) {
+            NdisAcquireRWLockWrite(ovsCtRelatedLockObj, &lockState, 0);
             for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
                 LIST_FORALL_SAFE(&ovsCtRelatedTable[i], link, next) {
                     entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index add1491..7b54fba 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -34,7 +34,7 @@  static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
 static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
-static LONG ctTotalEntries;
+static ULONG ctTotalEntries;
 
 static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple);
 static __inline NDIS_STATUS
@@ -208,7 +208,6 @@  OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
 {
     OVS_CT_EVENT_ENTRY ctEventEntry = {0};
     NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY));
-    ctEventEntry.entry.lock = NULL;
     ctEventEntry.entry.parent = NULL;
     ctEventEntry.type = type;
     OvsPostCtEvent(&ctEventEntry);
@@ -217,8 +216,8 @@  OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
 static __inline VOID
 OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
 {
-    LOCK_STATE_EX lockState;
-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
+    KIRQL irql = KeGetCurrentIrql();
+    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
     if (reply) {
         entry->rev_key.byteCount+= OvsPacketLenNBL(nbl);
         entry->rev_key.packetCount++;
@@ -226,11 +225,11 @@  OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl)
         entry->key.byteCount += OvsPacketLenNBL(nbl);
         entry->key.packetCount++;
     }
-    NdisReleaseRWLock(entry->lock, &lockState);
+    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
 }
 
 static __inline BOOLEAN
-OvsCtAddEntry(POVS_SWITCH_CONTEXT switchContext, POVS_CT_ENTRY entry,
+OvsCtAddEntry(POVS_CT_ENTRY entry,
               OvsConntrackKeyLookupCtx *ctx,
               PNAT_ACTION_INFO natInfo, UINT64 now)
 {
@@ -261,18 +260,14 @@  OvsCtAddEntry(POVS_SWITCH_CONTEXT switchContext, POVS_CT_ENTRY entry,
     }
 
     entry->timestampStart = now;
-    entry->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);
-    if (entry->lock == NULL) {
-        OVS_LOG_ERROR("NdisAllocateRWLock failed : Insufficient resources");
-        return FALSE;
-    }
+    NdisAllocateSpinLock(&(entry->lock));
     UINT32 bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
     entry->bucketLockRef = ovsCtBucketLock[bucketIdx];
     NdisAcquireRWLockWrite(ovsCtBucketLock[bucketIdx], &lockState, 0);
     InsertHeadList(&ovsConntrackTable[bucketIdx],
                    &entry->link);
 
-    InterlockedIncrement((LONG volatile *)&ctTotalEntries);
+    NdisInterlockedIncrement((PLONG)&ctTotalEntries);
     NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockState);
     return TRUE;
 }
@@ -351,8 +346,7 @@  OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
     if (state != OVS_CS_F_INVALID && commit) {
         if (entry) {
             entry->parent = parentEntry;
-            if (OvsCtAddEntry(fwdCtx->switchContext, entry, ctx,
-                              natInfo, currentTime)) {
+            if (OvsCtAddEntry(entry, ctx, natInfo, currentTime)) {
                 *entryCreated = TRUE;
             } else {
                 /* Unable to add entry to the list */
@@ -382,8 +376,7 @@  OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
                  UINT64 now)
 {
     CT_UPDATE_RES status;
-    LOCK_STATE_EX lockState;
-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
+    KIRQL irql = KeGetCurrentIrql();
     switch (ipProto) {
     case IPPROTO_TCP:
     {
@@ -394,20 +387,25 @@  OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
             status = CT_UPDATE_INVALID;
             break;
         }
+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
         status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
         break;
     }
     case IPPROTO_ICMP:
+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
         status = OvsConntrackUpdateIcmpEntry(entry, reply, now);
+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
         break;
     case IPPROTO_UDP:
+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
         status = OvsConntrackUpdateOtherEntry(entry, reply, now);
+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
         break;
     default:
         status = CT_UPDATE_INVALID;
         break;
     }
-    NdisReleaseRWLock(entry->lock, &lockState);
     return status;
 }
 
@@ -431,8 +429,8 @@  OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
     if (entry == NULL) {
         return;
     }
-    LOCK_STATE_EX lockState;
-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
+    KIRQL irql = KeGetCurrentIrql();
+    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
     if (forceDelete || OvsCtEntryExpired(entry)) {
         if (entry->natInfo.natAction) {
             LOCK_STATE_EX lockStateNat;
@@ -442,13 +440,13 @@  OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
         }
         OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
         RemoveEntryList(&entry->link);
-        NdisReleaseRWLock(entry->lock, &lockState);
-        NdisFreeRWLock(entry->lock);
+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
+        NdisFreeSpinLock(&(entry->lock));
         OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
-        InterlockedDecrement((LONG volatile*)&ctTotalEntries);
+        NdisInterlockedDecrement((PLONG)&ctTotalEntries);
         return;
     }
-    NdisReleaseRWLock(entry->lock, &lockState);
+    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
 }
 
 static __inline NDIS_STATUS
@@ -499,7 +497,7 @@  OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     POVS_CT_ENTRY entry;
     BOOLEAN reply = FALSE;
     POVS_CT_ENTRY found = NULL;
-    LOCK_STATE_EX lockState, lockStateTable;
+    LOCK_STATE_EX lockStateTable;
     UINT32 bucketIdx;
     /* Reverse NAT must be performed before OvsCtLookup, so here
      * we simply need to flip the src and dst in key and compare
@@ -512,11 +510,14 @@  OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
     if (!ctTotalEntries) {
         return found;
     }
+
+    KIRQL irql = KeGetCurrentIrql();
     bucketIdx = ctx->hash & CT_HASH_TABLE_MASK;
     NdisAcquireRWLockRead(ovsCtBucketLock[bucketIdx], &lockStateTable, 0);
     LIST_FORALL(&ovsConntrackTable[bucketIdx], link) {
         entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
-        NdisAcquireRWLockRead(entry->lock, &lockState, 0);
+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
+
         if (OvsCtKeyAreSame(ctx->key, entry->key)) {
             found = entry;
             reply = FALSE;
@@ -533,10 +534,10 @@  OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
             } else {
                 ctx->reply = reply;
             }
-            NdisReleaseRWLock(entry->lock, &lockState);
+            OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
             break;
         }
-        NdisReleaseRWLock(entry->lock, &lockState);
+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
     }
 
     NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockStateTable);
@@ -689,7 +690,7 @@  OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
     POVS_CT_ENTRY entry = ctx->entry;
     UINT32 state = 0;
     PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
-    LOCK_STATE_EX lockState, lockStateTable;
+    LOCK_STATE_EX lockStateTable;
     PNDIS_RW_LOCK_EX bucketLockRef = NULL;
     *entryCreated = FALSE;
 
@@ -730,7 +731,8 @@  OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
         }
     }
     if (entry) {
-        NdisAcquireRWLockRead(entry->lock, &lockState, 0);
+        KIRQL irql = KeGetCurrentIrql();
+        OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
         if (key->ipKey.nwProto == IPPROTO_TCP) {
             /* Update the related bit if there is a parent */
             if (entry->parent) {
@@ -748,7 +750,7 @@  OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
         /* Copy mark and label from entry into flowKey. If actions specify
            different mark and label, update the flowKey. */
         OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
-        NdisReleaseRWLock(entry->lock, &lockState);
+        OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
     } else {
         OvsCtUpdateFlowKey(key, state, zone, 0, NULL);
     }
@@ -802,8 +804,6 @@  OvsCtSetMarkLabel(OvsFlowKey *key,
                        MD_LABELS *labels,
                        BOOLEAN *triggerUpdateEvent)
 {
-    LOCK_STATE_EX lockState;
-    NdisAcquireRWLockWrite(entry->lock, &lockState, 0);
     if (mark) {
         OvsConntrackSetMark(key, entry, mark->value, mark->mask,
                             triggerUpdateEvent);
@@ -813,7 +813,6 @@  OvsCtSetMarkLabel(OvsFlowKey *key,
         OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask,
                               triggerUpdateEvent);
     }
-    NdisReleaseRWLock(entry->lock, &lockState);
 }
 
 /*
@@ -854,10 +853,11 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
 {
     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
     BOOLEAN triggerUpdateEvent = FALSE;
+    BOOLEAN entryCreated = FALSE;
     POVS_CT_ENTRY entry = NULL;
     PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
     OvsConntrackKeyLookupCtx ctx = { 0 };
-    LOCK_STATE_EX lockState, lockStateTable;
+    LOCK_STATE_EX lockStateTable;
     UINT64 currentTime;
     NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
 
@@ -866,10 +866,9 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
 
     /* Lookup Conntrack entries for a matching entry */
     entry = OvsCtLookup(&ctx);
-    BOOLEAN entryCreated = FALSE;
 
     /* Delete entry in reverse direction if 'force' is specified */
-    if (entry && force && ctx.reply) {
+    if (force && ctx.reply && entry) {
         PNDIS_RW_LOCK_EX bucketLockRef = entry->bucketLockRef;
         NdisAcquireRWLockWrite(bucketLockRef, &lockStateTable, 0);
         OvsCtEntryDelete(entry, TRUE);
@@ -877,34 +876,33 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
         entry = NULL;
     }
 
-    if (!entry && commit && ctTotalEntries >= CT_MAX_ENTRIES) {
-        /* Don't proceed with processing if the max limit has been hit.
-         * This blocks only new entries from being created and doesn't
-         * affect existing connections.
+    if (entry) {
+        /* Increment stats for the entry if it wasn't tracked previously or
+         * if they are on different zones
          */
-        OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
-        return NDIS_STATUS_RESOURCES;
-    }
-
-    /* Increment stats for the entry if it wasn't tracked previously or
-     * if they are on different zones
-     */
-    if (entry && (entry->key.zone != key->ct.zone ||
-           (!(key->ct.state & OVS_CS_F_TRACKED)))) {
-        OvsCtIncrementCounters(entry, ctx.reply, curNbl);
-    }
+        if ((entry->key.zone != key->ct.zone ||
+               (!(key->ct.state & OVS_CS_F_TRACKED)))) {
+            OvsCtIncrementCounters(entry, ctx.reply, curNbl);
+        }
+        /* Process the entry and update CT flags */
+        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
+                                         zone, natInfo, commit, currentTime,
+                                         &entryCreated);
 
-    if (!entry) {
+    } else {
+        if (commit && ctTotalEntries >= CT_MAX_ENTRIES) {
+            /* Don't proceed with processing if the max limit has been hit.
+             * This blocks only new entries from being created and doesn't
+             * affect existing connections.
+             */
+            OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
+            return NDIS_STATUS_RESOURCES;
+        }
         /* If no matching entry was found, create one and add New state */
         entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto,
                                  layers->l4Offset, &ctx,
                                  key, natInfo, commit, currentTime,
                                  &entryCreated);
-    } else {
-        /* Process the entry and update CT flags */
-        entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key,
-                                         zone, natInfo, commit, currentTime,
-                                         &entryCreated);
     }
 
     if (entry == NULL) {
@@ -918,6 +916,8 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
      * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or
      * NAT_ACTION_DST without NAT_ACTION_REVERSE
      */
+    KIRQL irql = KeGetCurrentIrql();
+    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
     if (natInfo->natAction != NAT_ACTION_NONE)
     {
         LOCK_STATE_EX lockStateNat;
@@ -939,15 +939,14 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
             OVS_LOG_ERROR("Error while parsing the FTP packet");
         }
     }
-    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
+
     /* Add original tuple information to flow Key */
     if (entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) {
         if (entry->parent != NULL) {
             POVS_CT_ENTRY parent = entry->parent;
-            LOCK_STATE_EX lockStateParent;
-            NdisAcquireRWLockRead(parent->lock, &lockStateParent, 0);
+            OVS_ACQUIRE_SPIN_LOCK(&(parent->lock), irql);
             OvsCtUpdateTuple(key, &parent->key);
-            NdisReleaseRWLock(parent->lock, &lockStateParent);
+            OVS_RELEASE_SPIN_LOCK(&(parent->lock), irql);
         } else {
             OvsCtUpdateTuple(key, &entry->key);
         }
@@ -955,13 +954,11 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
 
     if (entryCreated) {
         OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW);
-    }
-    if (postUpdateEvent && !entryCreated && triggerUpdateEvent) {
+    } else if (postUpdateEvent && triggerUpdateEvent) {
         OvsPostCtEventEntry(entry, OVS_EVENT_CT_UPDATE);
     }
 
-    NdisReleaseRWLock(entry->lock, &lockState);
-
+    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
     return status;
 }
 
@@ -1738,8 +1735,9 @@  OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
     UINT32 inIndex = instance->dumpState.index[1];
     UINT32 i = CT_HASH_TABLE_SIZE;
     UINT32 outIndex = 0;
+    KIRQL irql = KeGetCurrentIrql();
+    LOCK_STATE_EX lockStateTable;
 
-    LOCK_STATE_EX lockState, lockStateTable;
     if (ctTotalEntries) {
         for (i = inBucket; i < CT_HASH_TABLE_SIZE; i++) {
             PLIST_ENTRY head, link;
@@ -1756,7 +1754,7 @@  OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
                  */
                 if (outIndex >= inIndex) {
                     entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
-                    NdisAcquireRWLockRead(entry->lock, &lockState, 0);
+                    OVS_ACQUIRE_SPIN_LOCK(&(entry->lock), irql);
                     rc = OvsCreateNlMsgFromCtEntry(entry,
                                                    usrParamsCtx->outputBuffer,
                                                    usrParamsCtx->outputLength,
@@ -1765,7 +1763,7 @@  OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
                                                    msgIn->nlMsg.nlmsgPid,
                                                    msgIn->nfGenMsg.version,
                                                    0);
-                    NdisReleaseRWLock(entry->lock, &lockState);
+                    OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
                     if (rc != NDIS_STATUS_SUCCESS) {
                         NdisReleaseRWLock(ovsCtBucketLock[i], &lockStateTable);
                         return STATUS_UNSUCCESSFUL;
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index 3be309e..7dc92a1 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -101,7 +101,7 @@  typedef struct _NAT_ACTION_INFO {
 typedef struct OVS_CT_ENTRY {
     /* Reference to ovsCtBucketLock of ovsConntrackTable.*/
     PNDIS_RW_LOCK_EX bucketLockRef;
-    PNDIS_RW_LOCK_EX lock;       /* Protects OVS_CT_ENTRY. */
+    NDIS_SPIN_LOCK lock;       /* Protects OVS_CT_ENTRY. */
     OVS_CT_KEY  key;
     OVS_CT_KEY  rev_key;
     UINT64      expiration;
diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h
index 6f02147..a9bccf3 100644
--- a/datapath-windows/ovsext/Util.h
+++ b/datapath-windows/ovsext/Util.h
@@ -105,6 +105,24 @@  VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
 #define BIT16(_x)                       ((UINT16)0x1 << (_x))
 #define BIT32(_x)                       ((UINT32)0x1 << (_x))
 
+#define OVS_ACQUIRE_SPIN_LOCK(_pLock, _dispatchLevel)                        \
+{                                                                            \
+    if (_dispatchLevel) {                                                    \
+        NdisDprAcquireSpinLock(_pLock);                                      \
+    } else {                                                                 \
+        NdisAcquireSpinLock(_pLock);                                         \
+    }                                                                        \
+}
+
+#define OVS_RELEASE_SPIN_LOCK(_pLock, _dispatchLevel)                        \
+{                                                                            \
+    if (_dispatchLevel) {                                                    \
+        NdisDprReleaseSpinLock(_pLock);                                      \
+    } else {                                                                 \
+        NdisReleaseSpinLock(_pLock);                                         \
+    }                                                                        \
+}
+
 BOOLEAN OvsCompareString(PVOID string1, PVOID string2);
 
 /*