diff mbox

[ovs-dev,2/3,v2] datapath-windows: Add NAT module in conntrack

Message ID CY1PR0501MB13374EB61E056541ED2F8BA4D43C0@CY1PR0501MB1337.namprd05.prod.outlook.com
State Superseded
Headers show

Commit Message

Yin Lin March 22, 2017, 10:11 p.m. UTC
Signed-off-by: Yin Lin <linyi@vmware.com>

Issue: #
Change-Id: I6f37360c36525548b343f0016304015fec8aba7d
---
 datapath-windows/automake.mk            |   2 +
 datapath-windows/ovsext/Conntrack-nat.c | 437 ++++++++++++++++++++++++++++++++
 datapath-windows/ovsext/Conntrack-nat.h |  39 +++
 3 files changed, 478 insertions(+)
 create mode 100644 datapath-windows/ovsext/Conntrack-nat.c
 create mode 100644 datapath-windows/ovsext/Conntrack-nat.h

Comments

Anand Kumar March 23, 2017, 7:01 p.m. UTC | #1
Hi Yin,

Thank you for the patches. The patch looks good, I only have a few comments.  
Please find my comments inline prefixed with [AK]

Regards,
Anand Kumar

On 3/22/17, 3:11 PM, "ovs-dev-bounces@openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces@openvswitch.org on behalf of linyi@vmware.com> wrote:

    Signed-off-by: Yin Lin <linyi@vmware.com>

    
    Issue: #
    Change-Id: I6f37360c36525548b343f0016304015fec8aba7d
    ---
     datapath-windows/automake.mk            |   2 +
     datapath-windows/ovsext/Conntrack-nat.c | 437 ++++++++++++++++++++++++++++++++
     datapath-windows/ovsext/Conntrack-nat.h |  39 +++
     3 files changed, 478 insertions(+)
     create mode 100644 datapath-windows/ovsext/Conntrack-nat.c
     create mode 100644 datapath-windows/ovsext/Conntrack-nat.h
    
    diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
    index 53983ae..296e785 100644
    --- a/datapath-windows/automake.mk
    +++ b/datapath-windows/automake.mk
    @@ -16,7 +16,9 @@ EXTRA_DIST += \
     	datapath-windows/ovsext/Conntrack-icmp.c \
     	datapath-windows/ovsext/Conntrack-other.c \
     	datapath-windows/ovsext/Conntrack-related.c \
    +    datapath-windows/ovsext/Conntrack-nat.c \
     	datapath-windows/ovsext/Conntrack-tcp.c \
    +    datapath-windows/ovsext/Conntrack-nat.h \
     	datapath-windows/ovsext/Conntrack.c \
     	datapath-windows/ovsext/Conntrack.h \
     	datapath-windows/ovsext/Datapath.c \
    diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
    new file mode 100644
    index 0000000..4930694
    --- /dev/null
    +++ b/datapath-windows/ovsext/Conntrack-nat.c
    @@ -0,0 +1,437 @@
    +#include "Conntrack-nat.h"
    +#include "Jhash.h"
    +
    +PLIST_ENTRY ovsNatTable = NULL;
    +PLIST_ENTRY ovsUnNatTable = NULL;
    +static PNDIS_RW_LOCK_EX ovsNatLock;
    +
    +/*
    + *---------------------------------------------------------------------------
    + * OvsHashNatKey
    + *     Hash NAT related fields in a Conntrack key.
    + *---------------------------------------------------------------------------
    + */
    +static __inline UINT32
    +OvsHashNatKey(const OVS_CT_KEY *key)
    +{
    +    UINT32 hash = 0;
    +#define HASH_ADD(field) \
    +    hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)
    +
    +    HASH_ADD(src.addr.ipv4_aligned);
    +    HASH_ADD(dst.addr.ipv4_aligned);
    +    HASH_ADD(src.port);
    +    HASH_ADD(dst.port);
    +    HASH_ADD(zone);
    +#undef HASH_ADD
    +    return hash;
    +}
    +
    +/*
    + *---------------------------------------------------------------------------
    + * OvsNatKeyAreSame
    + *     Compare NAT related fields in a Conntrack key.
    + *---------------------------------------------------------------------------
    + */
    +static __inline BOOLEAN
    +OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)
    +{
    +    // XXX: Compare IPv6 key as well
    +#define FIELD_COMPARE(field) \
    +    if (key1->field != key2->field) return FALSE
    +
    +    FIELD_COMPARE(src.addr.ipv4_aligned);
    +    FIELD_COMPARE(dst.addr.ipv4_aligned);
    +    FIELD_COMPARE(src.port);
    +    FIELD_COMPARE(dst.port);
    +    FIELD_COMPARE(zone);
    +    return TRUE;
    +#undef FIELD_COMPARE
    +}
    +
    +/*
    + *---------------------------------------------------------------------------
    + * OvsNaGetBucket
    + *     Returns the row of NAT table that has the same hash as the given NAT
    + *     hash key. If isReverse is TRUE, returns the row of reverse NAT table
    + *     instead.
    + *---------------------------------------------------------------------------
    + */
    +static __inline PLIST_ENTRY
    +OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)
    +{
    +    uint32_t hash = OvsHashNatKey(key);
    +    if (isReverse) {
    +        return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];
    +    } else {
    +        return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];
    +    }
    +}
    +
    +/*
    + *---------------------------------------------------------------------------
    + * OvsNatInit
    + *     Initialize NAT related resources.
    + *---------------------------------------------------------------------------
    + */
    +NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context)
    +{
    +    ASSERT(ovsNatTable == NULL);
    +
    +    /* Init the sync-lock */
    +    ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle);
    +    if (ovsNatLock == NULL) {
    +        goto failNoMem;
    +    }
    +
    +    /* Init the Hash Buffer */
    +    ovsNatTable = OvsAllocateMemoryWithTag(
    +        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
    +        OVS_CT_POOL_TAG);
    +    if (ovsNatTable == NULL) {
    +        goto freeNatLock;
    +    }
    +
    +    ovsUnNatTable = OvsAllocateMemoryWithTag(
    +        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
    +        OVS_CT_POOL_TAG);
    +    if (ovsUnNatTable == NULL) {
    +        goto freeNatTable;
    +    }
    +
    +    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
    +        InitializeListHead(&ovsNatTable[i]);
    +        InitializeListHead(&ovsUnNatTable[i]);
    +    }
    +    return STATUS_SUCCESS;
    +
    +freeNatTable:
    +    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
    +freeNatLock:
    +    NdisFreeRWLock(ovsNatLock);
    +failNoMem:
    +    return STATUS_INSUFFICIENT_RESOURCES;
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatFlush
    + *     Flushes out all NAT entries that match the given zone.
    + *----------------------------------------------------------------------------
    + */
    +VOID OvsNatFlush(UINT16 zone)
    +{
    +    LOCK_STATE_EX lockState;
    +    PLIST_ENTRY link, next;
    +    NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0);
    +    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
    +        LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
    +            POVS_NAT_ENTRY entry =
    +                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
    +            /* zone is a non-zero value */
    +            if (!zone || zone == entry->key.zone) {
    +                OvsNatDeleteEntry(entry);
    +            }
    +        }
    +    }
    +    NdisReleaseRWLock(ovsNatLock, &lockState);
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatCleanup
    + *     Releases all NAT related resources.
    + *----------------------------------------------------------------------------
    + */
    +VOID OvsNatCleanup()
    +{
    +    if (ovsNatTable == NULL) return;
    +    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
    +    OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
    +    NdisFreeRWLock(ovsNatLock);
    +    ovsNatTable = NULL;
    +    ovsUnNatTable = NULL;
    +    ovsNatLock = NULL;
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatPacket
    + *     Performs NAT operation on the packet by replacing the source/destinaton
    + *     address/port based on natAction. If reverse is TRUE, perform unNAT
    + *     instead.
    + *----------------------------------------------------------------------------
    + */
    +VOID
    +OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
    +             const OVS_CT_ENTRY *entry,
    +             UINT16 natAction,
    +             OvsFlowKey *key,
    +             BOOLEAN reverse)
    +{
    +    UINT32 natFlag;
    +    const struct ct_endpoint* endpoint;
    +    /* When it is NAT, only entry->rev_key contains NATTED address;
    +       When it is unNAT, only entry->key contains the UNNATTED address;*/
    +    const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
    +    BOOLEAN isSrcNat;
    +
    +    if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
    +        return;
    +    }
    +    isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
    +                ((natAction & NAT_ACTION_DST) && reverse));
    +
    +    if (isSrcNat) {
    +        /* Flag is set to SNAT for SNAT case and the reverse DNAT case */
    +        natFlag = OVS_CS_F_SRC_NAT;
    +        /* Note that ctKey is the key in the other direction, so
    +           endpoint has to be reverted, i.e. ctKey->dst for SNAT
    +           and ctKey->src for DNAT */
    +        endpoint = &ctKey->dst;
    +    } else {
    +        natFlag = OVS_CS_F_DST_NAT;
    +        endpoint = &ctKey->src;
    +    }
    +    key->ct.state |= natFlag;
    +    if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
    +        OvsUpdateAddressAndPort(ovsFwdCtx,
    +                                endpoint->addr.ipv4_aligned,
    +                                endpoint->port, isSrcNat);
    +        if (isSrcNat) {
    +            key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
    +        } else {
    +            key->ipKey.nwDst = endpoint->addr.ipv4_aligned;
    +        }
    +    } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
    +        // XXX: IPv6 packet not supported yet.
    +        return;
    +    }
    +    if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
    +        if (isSrcNat) {
    +            if (key->ipKey.l4.tpSrc != 0) {
    +                key->ipKey.l4.tpSrc = endpoint->port;
    +            }
    +        } else {
    +            if (key->ipKey.l4.tpDst != 0) {
    +                key->ipKey.l4.tpDst = endpoint->port;
    +            }
    +        }
    +    }
    +}
    +
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatHashRange
    + *     Compute hash for a range of addresses specified in natInfo.
    + *----------------------------------------------------------------------------
    + */
    +static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)
    +{
    +    UINT32 hash = basis;
    +#define HASH_ADD(field) \
    +    hash = OvsJhashBytes(&field, sizeof(field), hash)
    +
    +    HASH_ADD(entry->natInfo.minAddr);
    +    HASH_ADD(entry->natInfo.maxAddr);
    +    HASH_ADD(entry->key.dl_type);
    +    HASH_ADD(entry->key.nw_proto);
    +    HASH_ADD(entry->key.zone);
    +#undef HASH_ADD
    +    return hash;
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatAddEntry
    + *     Add an entry to the NAT table. Also updates the reverse NAT lookup
    + *     table.
    + *----------------------------------------------------------------------------
    + */
    +VOID
    +OvsNatAddEntry(OVS_NAT_ENTRY* entry)
    +{
    +    InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),
    +                   &entry->link);
    +    InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),
    +                   &entry->reverseLink);
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatCtEntry
    + *     Update an Conntrack entry with NAT information. Translated address and
    + *     port will be generated and write back to the conntrack entry as a
    + *     result.
    + *----------------------------------------------------------------------------
    + */
    +BOOLEAN
    +OvsNatCtEntry(OVS_CT_ENTRY *entry)
    +{
    +    const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;
    +    const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;
    +
    +    uint16_t minPort;
    +    uint16_t maxPort;
    +    uint16_t firstPort;
    +
    +    uint32_t hash = OvsNatHashRange(entry, 0);
    +
    +    if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
    +        (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {
    +        firstPort = minPort = maxPort = ntohs(entry->key.src.port);
    +    } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&
    +               (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {
    +        firstPort = minPort = maxPort = ntohs(entry->key.dst.port);
    +    } else {
    +        uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort;
    +        uint16_t portIndex = (uint16_t) hash % (portDelta + 1);
    +        firstPort = entry->natInfo.minPort + portIndex;
    +        minPort = entry->natInfo.minPort;
    +        maxPort = entry->natInfo.maxPort;
    +    }
    +
    +    uint32_t addrDelta = 0;
    +    uint32_t addrIndex;
    +    struct ct_addr ctAddr, maxCtAddr;
    +    memset(&ctAddr, 0, sizeof ctAddr);
    +    memset(&maxCtAddr, 0, sizeof maxCtAddr);
    +    maxCtAddr = entry->natInfo.maxAddr;
    +
    +    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
    +        addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -
    +                    ntohl(entry->natInfo.minAddr.ipv4_aligned);
    +        addrIndex = hash % (addrDelta + 1);
    +        ctAddr.ipv4_aligned = htonl(
    +            ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);
    +    } else {
    +        // XXX: IPv6 not supported
    +        return FALSE;
    +    }
    +
    +    uint16_t port = firstPort;
    +    BOOLEAN allPortsTried = FALSE;
    +    BOOLEAN originalPortsTried = FALSE;
    +    struct ct_addr firstAddr = ctAddr;
    +    for (;;) {
    +        if (entry->natInfo.natAction & NAT_ACTION_SRC) {
    +            entry->rev_key.dst.addr = ctAddr;
    +            entry->rev_key.dst.port = htons(port);
    +        } else {
    +            entry->rev_key.src.addr = ctAddr;
    +            entry->rev_key.src.port = htons(port);
    +        }
    +
    +        OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);
    +
    +        if (!natEntry) {
    +            natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),
    +                                                OVS_CT_POOL_TAG);
    +            memcpy(&natEntry->key, &entry->key,
    +                   sizeof natEntry->key);
    +            memcpy(&natEntry->value, &entry->rev_key,
    +                   sizeof natEntry->value);
    +            natEntry->ctEntry = entry;
    +            OvsNatAddEntry(natEntry);
    +            return TRUE;
    +        } else if (!allPortsTried) {
    +            if (minPort == maxPort) {
    +                allPortsTried = TRUE;
    +            } else if (port == maxPort) {
    +                port = minPort;
    +            } else {
    +                port++;
    +            }
    +            if (port == firstPort) {
    +                allPortsTried = TRUE;
    +            }
    +        } else {
    +            if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {
    +                if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
    +                    ctAddr.ipv4_aligned = htonl(
    +                        ntohl(ctAddr.ipv4_aligned) + 1);
    +                } else {
    +                    // XXX: IPv6 not supported
    +                    return FALSE;
    +                }
    +            } else {
    +                ctAddr = entry->natInfo.minAddr;
    +            }
    +            if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {
    +                if (!originalPortsTried) {
    +                    originalPortsTried = TRUE;
    +                    ctAddr = entry->natInfo.minAddr;
    +                    minPort = MIN_NAT_EPHEMERAL_PORT;
    +                    maxPort = MAX_NAT_EPHEMERAL_PORT;
    +                } else {
    +                    break;
    +                }
    +            }
    +            firstPort = minPort;
    +            port = firstPort;
    +            allPortsTried = FALSE;
    +        }
    +    }
    +    return FALSE;
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatLookup
    + *     Look up a NAT entry with the given key in the NAT table.
    + *     If reverse is TRUE, look up a NAT entry with the given value instead.
    + *----------------------------------------------------------------------------
    + */
    +POVS_NAT_ENTRY
    +OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
    +{
    +    PLIST_ENTRY link;
    +    POVS_NAT_ENTRY entry;
    +
    +    LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
    +        if (reverse) {
    +            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
    +
    +            if (OvsNatKeyAreSame(ctKey, &entry->value)) {
    +                return entry;
    +            }
    +        } else {
    +            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
    +
    +            if (OvsNatKeyAreSame(ctKey, &entry->key)) {
    +                return entry;
    +            }
    +        }
    +    }
    +    return NULL;
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatDeleteEntry
    + *     Delete a NAT entry.
    + *----------------------------------------------------------------------------
    + */
    +VOID
    +OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
    +{
    +    if (entry == NULL) {
    +        return;
    +    }
    +    RemoveEntryList(&entry->link);
    +    RemoveEntryList(&entry->reverseLink);
    +    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
    +}
    +
    +/*
    + *----------------------------------------------------------------------------
    + * OvsNatDeleteKey
    + *     Delete a NAT entry with the given key.
    + *----------------------------------------------------------------------------
    + */
    +VOID
    +OvsNatDeleteKey(const OVS_CT_KEY *key)
    +{
    +    OvsNatDeleteEntry(OvsNatLookup(key, FALSE));
    +}
    diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h
    new file mode 100644
    index 0000000..99f2b67
    --- /dev/null
    +++ b/datapath-windows/ovsext/Conntrack-nat.h
    @@ -0,0 +1,39 @@
    +#ifndef _CONNTRACK_NAT_H
    +#define _CONNTRACK_NAT_H
    +
    +#include "precomp.h"
    +#include "Flow.h"
    +#include "Debug.h"
    +#include <stddef.h>
    +#include "Conntrack.h"
    +
    +#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)
    +#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)
    +
    +typedef struct OVS_NAT_ENTRY {
    +    LIST_ENTRY link;
    +    LIST_ENTRY reverseLink;
    +    OVS_CT_KEY key;
    +    OVS_CT_KEY value;
    +    POVS_CT_ENTRY  ctEntry;
    +} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;
    +
    +__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {
[AK] – Is there a need for double inversion here?
    +    return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));
    +}
    +
    +NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT);
    +VOID OvsNatFlush(UINT16 zone);
    +
    +VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);
    +
    +VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);
    +VOID OvsNatDeleteKey(const OVS_CT_KEY *key);
    +VOID OvsNatCleanup();
    +
    +POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);
    +BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);
    +VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry,
    +                  UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);
    +
    +#endif
[AK] – new line should be added to the end of file.
    \ No newline at end of file
    -- 
    2.10.2.windows.1
Sairam Venugopal March 24, 2017, 6:58 a.m. UTC | #2
Hi Yin,

Thanks for sending over the patch. While it looks good for the most part, there are certain indentations that need fixing. I will send those out separately.

I see that you have allocated - ovsNatLock and have added in stubs for flush and cleanup similar to Conntrack.c. 

However, I don’t see you taking out the lock for accessing the NAT tables. How are you verifying thread-safety here?

Thanks,
Sairam




On 3/22/17, 3:11 PM, "ovs-dev-bounces@openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces@openvswitch.org on behalf of linyi@vmware.com> wrote:

>Signed-off-by: Yin Lin <linyi@vmware.com>

>

>Issue: #

>Change-Id: I6f37360c36525548b343f0016304015fec8aba7d

>---

> datapath-windows/automake.mk            |   2 +

> datapath-windows/ovsext/Conntrack-nat.c | 437 ++++++++++++++++++++++++++++++++

> datapath-windows/ovsext/Conntrack-nat.h |  39 +++

> 3 files changed, 478 insertions(+)

> create mode 100644 datapath-windows/ovsext/Conntrack-nat.c

> create mode 100644 datapath-windows/ovsext/Conntrack-nat.h

>

>diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk

>index 53983ae..296e785 100644

>--- a/datapath-windows/automake.mk

>+++ b/datapath-windows/automake.mk

>@@ -16,7 +16,9 @@ EXTRA_DIST += \

> 	datapath-windows/ovsext/Conntrack-icmp.c \

> 	datapath-windows/ovsext/Conntrack-other.c \

> 	datapath-windows/ovsext/Conntrack-related.c \

>+    datapath-windows/ovsext/Conntrack-nat.c \

> 	datapath-windows/ovsext/Conntrack-tcp.c \

>+    datapath-windows/ovsext/Conntrack-nat.h \

> 	datapath-windows/ovsext/Conntrack.c \

> 	datapath-windows/ovsext/Conntrack.h \

> 	datapath-windows/ovsext/Datapath.c \

>diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c

>new file mode 100644

>index 0000000..4930694

>--- /dev/null

>+++ b/datapath-windows/ovsext/Conntrack-nat.c

>@@ -0,0 +1,437 @@

>+#include "Conntrack-nat.h"

>+#include "Jhash.h"

>+

>+PLIST_ENTRY ovsNatTable = NULL;

>+PLIST_ENTRY ovsUnNatTable = NULL;

>+static PNDIS_RW_LOCK_EX ovsNatLock;

>+

>+/*

>+ *---------------------------------------------------------------------------

>+ * OvsHashNatKey

>+ *     Hash NAT related fields in a Conntrack key.

>+ *---------------------------------------------------------------------------

>+ */

>+static __inline UINT32

>+OvsHashNatKey(const OVS_CT_KEY *key)

>+{

>+    UINT32 hash = 0;

>+#define HASH_ADD(field) \

>+    hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)

>+

>+    HASH_ADD(src.addr.ipv4_aligned);

>+    HASH_ADD(dst.addr.ipv4_aligned);

>+    HASH_ADD(src.port);

>+    HASH_ADD(dst.port);

>+    HASH_ADD(zone);

>+#undef HASH_ADD

>+    return hash;

>+}

>+

>+/*

>+ *---------------------------------------------------------------------------

>+ * OvsNatKeyAreSame

>+ *     Compare NAT related fields in a Conntrack key.

>+ *---------------------------------------------------------------------------

>+ */

>+static __inline BOOLEAN

>+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)

>+{

>+    // XXX: Compare IPv6 key as well

>+#define FIELD_COMPARE(field) \

>+    if (key1->field != key2->field) return FALSE

>+

>+    FIELD_COMPARE(src.addr.ipv4_aligned);

>+    FIELD_COMPARE(dst.addr.ipv4_aligned);

>+    FIELD_COMPARE(src.port);

>+    FIELD_COMPARE(dst.port);

>+    FIELD_COMPARE(zone);

>+    return TRUE;

>+#undef FIELD_COMPARE

>+}

>+

>+/*

>+ *---------------------------------------------------------------------------

>+ * OvsNaGetBucket

>+ *     Returns the row of NAT table that has the same hash as the given NAT

>+ *     hash key. If isReverse is TRUE, returns the row of reverse NAT table

>+ *     instead.

>+ *---------------------------------------------------------------------------

>+ */

>+static __inline PLIST_ENTRY

>+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)

>+{

>+    uint32_t hash = OvsHashNatKey(key);

>+    if (isReverse) {

>+        return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];

>+    } else {

>+        return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];

>+    }

>+}

>+

>+/*

>+ *---------------------------------------------------------------------------

>+ * OvsNatInit

>+ *     Initialize NAT related resources.

>+ *---------------------------------------------------------------------------

>+ */

>+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context)

>+{

>+    ASSERT(ovsNatTable == NULL);

>+

>+    /* Init the sync-lock */

>+    ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle);

>+    if (ovsNatLock == NULL) {

>+        goto failNoMem;

>+    }

>+

>+    /* Init the Hash Buffer */

>+    ovsNatTable = OvsAllocateMemoryWithTag(

>+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,

>+        OVS_CT_POOL_TAG);

>+    if (ovsNatTable == NULL) {

>+        goto freeNatLock;

>+    }

>+

>+    ovsUnNatTable = OvsAllocateMemoryWithTag(

>+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,

>+        OVS_CT_POOL_TAG);

>+    if (ovsUnNatTable == NULL) {

>+        goto freeNatTable;

>+    }

>+

>+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {

>+        InitializeListHead(&ovsNatTable[i]);

>+        InitializeListHead(&ovsUnNatTable[i]);

>+    }

>+    return STATUS_SUCCESS;

>+

>+freeNatTable:

>+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);

>+freeNatLock:

>+    NdisFreeRWLock(ovsNatLock);

>+failNoMem:

>+    return STATUS_INSUFFICIENT_RESOURCES;

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatFlush

>+ *     Flushes out all NAT entries that match the given zone.

>+ *----------------------------------------------------------------------------

>+ */

>+VOID OvsNatFlush(UINT16 zone)

>+{

>+    LOCK_STATE_EX lockState;

>+    PLIST_ENTRY link, next;

>+    NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0);

>+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {

>+        LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {

>+            POVS_NAT_ENTRY entry =

>+                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);

>+            /* zone is a non-zero value */

>+            if (!zone || zone == entry->key.zone) {

>+                OvsNatDeleteEntry(entry);

>+            }

>+        }

>+    }

>+    NdisReleaseRWLock(ovsNatLock, &lockState);

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatCleanup

>+ *     Releases all NAT related resources.

>+ *----------------------------------------------------------------------------

>+ */

>+VOID OvsNatCleanup()

>+{

>+    if (ovsNatTable == NULL) return;

>+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);

>+    OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);

>+    NdisFreeRWLock(ovsNatLock);

>+    ovsNatTable = NULL;

>+    ovsUnNatTable = NULL;

>+    ovsNatLock = NULL;

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatPacket

>+ *     Performs NAT operation on the packet by replacing the source/destinaton

>+ *     address/port based on natAction. If reverse is TRUE, perform unNAT

>+ *     instead.

>+ *----------------------------------------------------------------------------

>+ */

>+VOID

>+OvsNatPacket(OvsForwardingContext *ovsFwdCtx,

>+             const OVS_CT_ENTRY *entry,

>+             UINT16 natAction,

>+             OvsFlowKey *key,

>+             BOOLEAN reverse)

>+{

>+    UINT32 natFlag;

>+    const struct ct_endpoint* endpoint;

>+    /* When it is NAT, only entry->rev_key contains NATTED address;

>+       When it is unNAT, only entry->key contains the UNNATTED address;*/

>+    const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;

>+    BOOLEAN isSrcNat;

>+

>+    if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {

>+        return;

>+    }

>+    isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||

>+                ((natAction & NAT_ACTION_DST) && reverse));

>+

>+    if (isSrcNat) {

>+        /* Flag is set to SNAT for SNAT case and the reverse DNAT case */

>+        natFlag = OVS_CS_F_SRC_NAT;

>+        /* Note that ctKey is the key in the other direction, so

>+           endpoint has to be reverted, i.e. ctKey->dst for SNAT

>+           and ctKey->src for DNAT */

>+        endpoint = &ctKey->dst;

>+    } else {

>+        natFlag = OVS_CS_F_DST_NAT;

>+        endpoint = &ctKey->src;

>+    }

>+    key->ct.state |= natFlag;

>+    if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {

>+        OvsUpdateAddressAndPort(ovsFwdCtx,

>+                                endpoint->addr.ipv4_aligned,

>+                                endpoint->port, isSrcNat);

>+        if (isSrcNat) {

>+            key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;

>+        } else {

>+            key->ipKey.nwDst = endpoint->addr.ipv4_aligned;

>+        }

>+    } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){

>+        // XXX: IPv6 packet not supported yet.

>+        return;

>+    }

>+    if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {

>+        if (isSrcNat) {

>+            if (key->ipKey.l4.tpSrc != 0) {

>+                key->ipKey.l4.tpSrc = endpoint->port;

>+            }

>+        } else {

>+            if (key->ipKey.l4.tpDst != 0) {

>+                key->ipKey.l4.tpDst = endpoint->port;

>+            }

>+        }

>+    }

>+}

>+

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatHashRange

>+ *     Compute hash for a range of addresses specified in natInfo.

>+ *----------------------------------------------------------------------------

>+ */

>+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)

>+{

>+    UINT32 hash = basis;

>+#define HASH_ADD(field) \

>+    hash = OvsJhashBytes(&field, sizeof(field), hash)

>+

>+    HASH_ADD(entry->natInfo.minAddr);

>+    HASH_ADD(entry->natInfo.maxAddr);

>+    HASH_ADD(entry->key.dl_type);

>+    HASH_ADD(entry->key.nw_proto);

>+    HASH_ADD(entry->key.zone);

>+#undef HASH_ADD

>+    return hash;

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatAddEntry

>+ *     Add an entry to the NAT table. Also updates the reverse NAT lookup

>+ *     table.

>+ *----------------------------------------------------------------------------

>+ */

>+VOID

>+OvsNatAddEntry(OVS_NAT_ENTRY* entry)

>+{

>+    InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),

>+                   &entry->link);

>+    InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),

>+                   &entry->reverseLink);

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatCtEntry

>+ *     Update an Conntrack entry with NAT information. Translated address and

>+ *     port will be generated and write back to the conntrack entry as a

>+ *     result.

>+ *----------------------------------------------------------------------------

>+ */

>+BOOLEAN

>+OvsNatCtEntry(OVS_CT_ENTRY *entry)

>+{

>+    const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;

>+    const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;

>+

>+    uint16_t minPort;

>+    uint16_t maxPort;

>+    uint16_t firstPort;

>+

>+    uint32_t hash = OvsNatHashRange(entry, 0);

>+

>+    if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&

>+        (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {

>+        firstPort = minPort = maxPort = ntohs(entry->key.src.port);

>+    } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&

>+               (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {

>+        firstPort = minPort = maxPort = ntohs(entry->key.dst.port);

>+    } else {

>+        uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort;

>+        uint16_t portIndex = (uint16_t) hash % (portDelta + 1);

>+        firstPort = entry->natInfo.minPort + portIndex;

>+        minPort = entry->natInfo.minPort;

>+        maxPort = entry->natInfo.maxPort;

>+    }

>+

>+    uint32_t addrDelta = 0;

>+    uint32_t addrIndex;

>+    struct ct_addr ctAddr, maxCtAddr;

>+    memset(&ctAddr, 0, sizeof ctAddr);

>+    memset(&maxCtAddr, 0, sizeof maxCtAddr);

>+    maxCtAddr = entry->natInfo.maxAddr;

>+

>+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {

>+        addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -

>+                    ntohl(entry->natInfo.minAddr.ipv4_aligned);

>+        addrIndex = hash % (addrDelta + 1);

>+        ctAddr.ipv4_aligned = htonl(

>+            ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);

>+    } else {

>+        // XXX: IPv6 not supported

>+        return FALSE;

>+    }

>+

>+    uint16_t port = firstPort;

>+    BOOLEAN allPortsTried = FALSE;

>+    BOOLEAN originalPortsTried = FALSE;

>+    struct ct_addr firstAddr = ctAddr;

>+    for (;;) {

>+        if (entry->natInfo.natAction & NAT_ACTION_SRC) {

>+            entry->rev_key.dst.addr = ctAddr;

>+            entry->rev_key.dst.port = htons(port);

>+        } else {

>+            entry->rev_key.src.addr = ctAddr;

>+            entry->rev_key.src.port = htons(port);

>+        }

>+

>+        OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);

>+

>+        if (!natEntry) {

>+            natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),

>+                                                OVS_CT_POOL_TAG);

>+            memcpy(&natEntry->key, &entry->key,

>+                   sizeof natEntry->key);

>+            memcpy(&natEntry->value, &entry->rev_key,

>+                   sizeof natEntry->value);

>+            natEntry->ctEntry = entry;

>+            OvsNatAddEntry(natEntry);

>+            return TRUE;

>+        } else if (!allPortsTried) {

>+            if (minPort == maxPort) {

>+                allPortsTried = TRUE;

>+            } else if (port == maxPort) {

>+                port = minPort;

>+            } else {

>+                port++;

>+            }

>+            if (port == firstPort) {

>+                allPortsTried = TRUE;

>+            }

>+        } else {

>+            if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {

>+                if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {

>+                    ctAddr.ipv4_aligned = htonl(

>+                        ntohl(ctAddr.ipv4_aligned) + 1);

>+                } else {

>+                    // XXX: IPv6 not supported

>+                    return FALSE;

>+                }

>+            } else {

>+                ctAddr = entry->natInfo.minAddr;

>+            }

>+            if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {

>+                if (!originalPortsTried) {

>+                    originalPortsTried = TRUE;

>+                    ctAddr = entry->natInfo.minAddr;

>+                    minPort = MIN_NAT_EPHEMERAL_PORT;

>+                    maxPort = MAX_NAT_EPHEMERAL_PORT;

>+                } else {

>+                    break;

>+                }

>+            }

>+            firstPort = minPort;

>+            port = firstPort;

>+            allPortsTried = FALSE;

>+        }

>+    }

>+    return FALSE;

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatLookup

>+ *     Look up a NAT entry with the given key in the NAT table.

>+ *     If reverse is TRUE, look up a NAT entry with the given value instead.

>+ *----------------------------------------------------------------------------

>+ */

>+POVS_NAT_ENTRY

>+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)

>+{

>+    PLIST_ENTRY link;

>+    POVS_NAT_ENTRY entry;

>+

>+    LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {

>+        if (reverse) {

>+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);

>+

>+            if (OvsNatKeyAreSame(ctKey, &entry->value)) {

>+                return entry;

>+            }

>+        } else {

>+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);

>+

>+            if (OvsNatKeyAreSame(ctKey, &entry->key)) {

>+                return entry;

>+            }

>+        }

>+    }

>+    return NULL;

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatDeleteEntry

>+ *     Delete a NAT entry.

>+ *----------------------------------------------------------------------------

>+ */

>+VOID

>+OvsNatDeleteEntry(POVS_NAT_ENTRY entry)

>+{

>+    if (entry == NULL) {

>+        return;

>+    }

>+    RemoveEntryList(&entry->link);

>+    RemoveEntryList(&entry->reverseLink);

>+    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);

>+}

>+

>+/*

>+ *----------------------------------------------------------------------------

>+ * OvsNatDeleteKey

>+ *     Delete a NAT entry with the given key.

>+ *----------------------------------------------------------------------------

>+ */

>+VOID

>+OvsNatDeleteKey(const OVS_CT_KEY *key)

>+{

>+    OvsNatDeleteEntry(OvsNatLookup(key, FALSE));

>+}

>diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h

>new file mode 100644

>index 0000000..99f2b67

>--- /dev/null

>+++ b/datapath-windows/ovsext/Conntrack-nat.h

>@@ -0,0 +1,39 @@

>+#ifndef _CONNTRACK_NAT_H

>+#define _CONNTRACK_NAT_H

>+

>+#include "precomp.h"

>+#include "Flow.h"

>+#include "Debug.h"

>+#include <stddef.h>

>+#include "Conntrack.h"

>+

>+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)

>+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)

>+

>+typedef struct OVS_NAT_ENTRY {

>+    LIST_ENTRY link;

>+    LIST_ENTRY reverseLink;

>+    OVS_CT_KEY key;

>+    OVS_CT_KEY value;

>+    POVS_CT_ENTRY  ctEntry;

>+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;

>+

>+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {

>+    return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));

>+}

>+

>+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT);

>+VOID OvsNatFlush(UINT16 zone);

>+

>+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);

>+

>+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);

>+VOID OvsNatDeleteKey(const OVS_CT_KEY *key);

>+VOID OvsNatCleanup();

>+

>+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);

>+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);

>+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry,

>+                  UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);

>+

>+#endif

>\ No newline at end of file

>-- 

>2.10.2.windows.1

>
Yin Lin March 24, 2017, 5:58 p.m. UTC | #3
Hi Sai,

You are right. Locking is a little bit tricky here than in Conntrack.c
because we cannot grab the lock during the entire Conntrack transaction. I
will have to address thread safety in another patch.

Best regards,
Yin Lin

On Thu, Mar 23, 2017 at 11:58 PM, Sairam Venugopal <vsairam@vmware.com>
wrote:

> Hi Yin,
>
> Thanks for sending over the patch. While it looks good for the most part,
> there are certain indentations that need fixing. I will send those out
> separately.
>
> I see that you have allocated - ovsNatLock and have added in stubs for
> flush and cleanup similar to Conntrack.c.
>
> However, I don’t see you taking out the lock for accessing the NAT tables.
> How are you verifying thread-safety here?
>
> Thanks,
> Sairam
>
>
>
>
> On 3/22/17, 3:11 PM, "ovs-dev-bounces@openvswitch.org on behalf of Yin
> Lin" <ovs-dev-bounces@openvswitch.org on behalf of linyi@vmware.com>
> wrote:
>
> >Signed-off-by: Yin Lin <linyi@vmware.com>
> >
> >Issue: #
> >Change-Id: I6f37360c36525548b343f0016304015fec8aba7d
> >---
> > datapath-windows/automake.mk            |   2 +
> > datapath-windows/ovsext/Conntrack-nat.c | 437
> ++++++++++++++++++++++++++++++++
> > datapath-windows/ovsext/Conntrack-nat.h |  39 +++
> > 3 files changed, 478 insertions(+)
> > create mode 100644 datapath-windows/ovsext/Conntrack-nat.c
> > create mode 100644 datapath-windows/ovsext/Conntrack-nat.h
> >
> >diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
> >index 53983ae..296e785 100644
> >--- a/datapath-windows/automake.mk
> >+++ b/datapath-windows/automake.mk
> >@@ -16,7 +16,9 @@ EXTRA_DIST += \
> >       datapath-windows/ovsext/Conntrack-icmp.c \
> >       datapath-windows/ovsext/Conntrack-other.c \
> >       datapath-windows/ovsext/Conntrack-related.c \
> >+    datapath-windows/ovsext/Conntrack-nat.c \
> >       datapath-windows/ovsext/Conntrack-tcp.c \
> >+    datapath-windows/ovsext/Conntrack-nat.h \
> >       datapath-windows/ovsext/Conntrack.c \
> >       datapath-windows/ovsext/Conntrack.h \
> >       datapath-windows/ovsext/Datapath.c \
> >diff --git a/datapath-windows/ovsext/Conntrack-nat.c
> b/datapath-windows/ovsext/Conntrack-nat.c
> >new file mode 100644
> >index 0000000..4930694
> >--- /dev/null
> >+++ b/datapath-windows/ovsext/Conntrack-nat.c
> >@@ -0,0 +1,437 @@
> >+#include "Conntrack-nat.h"
> >+#include "Jhash.h"
> >+
> >+PLIST_ENTRY ovsNatTable = NULL;
> >+PLIST_ENTRY ovsUnNatTable = NULL;
> >+static PNDIS_RW_LOCK_EX ovsNatLock;
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsHashNatKey
> >+ *     Hash NAT related fields in a Conntrack key.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+static __inline UINT32
> >+OvsHashNatKey(const OVS_CT_KEY *key)
> >+{
> >+    UINT32 hash = 0;
> >+#define HASH_ADD(field) \
> >+    hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)
> >+
> >+    HASH_ADD(src.addr.ipv4_aligned);
> >+    HASH_ADD(dst.addr.ipv4_aligned);
> >+    HASH_ADD(src.port);
> >+    HASH_ADD(dst.port);
> >+    HASH_ADD(zone);
> >+#undef HASH_ADD
> >+    return hash;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsNatKeyAreSame
> >+ *     Compare NAT related fields in a Conntrack key.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+static __inline BOOLEAN
> >+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)
> >+{
> >+    // XXX: Compare IPv6 key as well
> >+#define FIELD_COMPARE(field) \
> >+    if (key1->field != key2->field) return FALSE
> >+
> >+    FIELD_COMPARE(src.addr.ipv4_aligned);
> >+    FIELD_COMPARE(dst.addr.ipv4_aligned);
> >+    FIELD_COMPARE(src.port);
> >+    FIELD_COMPARE(dst.port);
> >+    FIELD_COMPARE(zone);
> >+    return TRUE;
> >+#undef FIELD_COMPARE
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsNaGetBucket
> >+ *     Returns the row of NAT table that has the same hash as the given
> NAT
> >+ *     hash key. If isReverse is TRUE, returns the row of reverse NAT
> table
> >+ *     instead.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+static __inline PLIST_ENTRY
> >+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)
> >+{
> >+    uint32_t hash = OvsHashNatKey(key);
> >+    if (isReverse) {
> >+        return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];
> >+    } else {
> >+        return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];
> >+    }
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsNatInit
> >+ *     Initialize NAT related resources.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context)
> >+{
> >+    ASSERT(ovsNatTable == NULL);
> >+
> >+    /* Init the sync-lock */
> >+    ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle);
> >+    if (ovsNatLock == NULL) {
> >+        goto failNoMem;
> >+    }
> >+
> >+    /* Init the Hash Buffer */
> >+    ovsNatTable = OvsAllocateMemoryWithTag(
> >+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
> >+        OVS_CT_POOL_TAG);
> >+    if (ovsNatTable == NULL) {
> >+        goto freeNatLock;
> >+    }
> >+
> >+    ovsUnNatTable = OvsAllocateMemoryWithTag(
> >+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
> >+        OVS_CT_POOL_TAG);
> >+    if (ovsUnNatTable == NULL) {
> >+        goto freeNatTable;
> >+    }
> >+
> >+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
> >+        InitializeListHead(&ovsNatTable[i]);
> >+        InitializeListHead(&ovsUnNatTable[i]);
> >+    }
> >+    return STATUS_SUCCESS;
> >+
> >+freeNatTable:
> >+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
> >+freeNatLock:
> >+    NdisFreeRWLock(ovsNatLock);
> >+failNoMem:
> >+    return STATUS_INSUFFICIENT_RESOURCES;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatFlush
> >+ *     Flushes out all NAT entries that match the given zone.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID OvsNatFlush(UINT16 zone)
> >+{
> >+    LOCK_STATE_EX lockState;
> >+    PLIST_ENTRY link, next;
> >+    NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0);
> >+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
> >+        LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
> >+            POVS_NAT_ENTRY entry =
> >+                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
> >+            /* zone is a non-zero value */
> >+            if (!zone || zone == entry->key.zone) {
> >+                OvsNatDeleteEntry(entry);
> >+            }
> >+        }
> >+    }
> >+    NdisReleaseRWLock(ovsNatLock, &lockState);
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatCleanup
> >+ *     Releases all NAT related resources.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID OvsNatCleanup()
> >+{
> >+    if (ovsNatTable == NULL) return;
> >+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
> >+    OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
> >+    NdisFreeRWLock(ovsNatLock);
> >+    ovsNatTable = NULL;
> >+    ovsUnNatTable = NULL;
> >+    ovsNatLock = NULL;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatPacket
> >+ *     Performs NAT operation on the packet by replacing the
> source/destinaton
> >+ *     address/port based on natAction. If reverse is TRUE, perform unNAT
> >+ *     instead.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
> >+             const OVS_CT_ENTRY *entry,
> >+             UINT16 natAction,
> >+             OvsFlowKey *key,
> >+             BOOLEAN reverse)
> >+{
> >+    UINT32 natFlag;
> >+    const struct ct_endpoint* endpoint;
> >+    /* When it is NAT, only entry->rev_key contains NATTED address;
> >+       When it is unNAT, only entry->key contains the UNNATTED address;*/
> >+    const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
> >+    BOOLEAN isSrcNat;
> >+
> >+    if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
> >+        return;
> >+    }
> >+    isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
> >+                ((natAction & NAT_ACTION_DST) && reverse));
> >+
> >+    if (isSrcNat) {
> >+        /* Flag is set to SNAT for SNAT case and the reverse DNAT case */
> >+        natFlag = OVS_CS_F_SRC_NAT;
> >+        /* Note that ctKey is the key in the other direction, so
> >+           endpoint has to be reverted, i.e. ctKey->dst for SNAT
> >+           and ctKey->src for DNAT */
> >+        endpoint = &ctKey->dst;
> >+    } else {
> >+        natFlag = OVS_CS_F_DST_NAT;
> >+        endpoint = &ctKey->src;
> >+    }
> >+    key->ct.state |= natFlag;
> >+    if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
> >+        OvsUpdateAddressAndPort(ovsFwdCtx,
> >+                                endpoint->addr.ipv4_aligned,
> >+                                endpoint->port, isSrcNat);
> >+        if (isSrcNat) {
> >+            key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
> >+        } else {
> >+            key->ipKey.nwDst = endpoint->addr.ipv4_aligned;
> >+        }
> >+    } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
> >+        // XXX: IPv6 packet not supported yet.
> >+        return;
> >+    }
> >+    if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
> >+        if (isSrcNat) {
> >+            if (key->ipKey.l4.tpSrc != 0) {
> >+                key->ipKey.l4.tpSrc = endpoint->port;
> >+            }
> >+        } else {
> >+            if (key->ipKey.l4.tpDst != 0) {
> >+                key->ipKey.l4.tpDst = endpoint->port;
> >+            }
> >+        }
> >+    }
> >+}
> >+
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatHashRange
> >+ *     Compute hash for a range of addresses specified in natInfo.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)
> >+{
> >+    UINT32 hash = basis;
> >+#define HASH_ADD(field) \
> >+    hash = OvsJhashBytes(&field, sizeof(field), hash)
> >+
> >+    HASH_ADD(entry->natInfo.minAddr);
> >+    HASH_ADD(entry->natInfo.maxAddr);
> >+    HASH_ADD(entry->key.dl_type);
> >+    HASH_ADD(entry->key.nw_proto);
> >+    HASH_ADD(entry->key.zone);
> >+#undef HASH_ADD
> >+    return hash;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatAddEntry
> >+ *     Add an entry to the NAT table. Also updates the reverse NAT lookup
> >+ *     table.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatAddEntry(OVS_NAT_ENTRY* entry)
> >+{
> >+    InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),
> >+                   &entry->link);
> >+    InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),
> >+                   &entry->reverseLink);
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatCtEntry
> >+ *     Update an Conntrack entry with NAT information. Translated
> address and
> >+ *     port will be generated and write back to the conntrack entry as a
> >+ *     result.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+BOOLEAN
> >+OvsNatCtEntry(OVS_CT_ENTRY *entry)
> >+{
> >+    const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;
> >+    const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;
> >+
> >+    uint16_t minPort;
> >+    uint16_t maxPort;
> >+    uint16_t firstPort;
> >+
> >+    uint32_t hash = OvsNatHashRange(entry, 0);
> >+
> >+    if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
> >+        (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {
> >+        firstPort = minPort = maxPort = ntohs(entry->key.src.port);
> >+    } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&
> >+               (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {
> >+        firstPort = minPort = maxPort = ntohs(entry->key.dst.port);
> >+    } else {
> >+        uint16_t portDelta = entry->natInfo.maxPort -
> entry->natInfo.minPort;
> >+        uint16_t portIndex = (uint16_t) hash % (portDelta + 1);
> >+        firstPort = entry->natInfo.minPort + portIndex;
> >+        minPort = entry->natInfo.minPort;
> >+        maxPort = entry->natInfo.maxPort;
> >+    }
> >+
> >+    uint32_t addrDelta = 0;
> >+    uint32_t addrIndex;
> >+    struct ct_addr ctAddr, maxCtAddr;
> >+    memset(&ctAddr, 0, sizeof ctAddr);
> >+    memset(&maxCtAddr, 0, sizeof maxCtAddr);
> >+    maxCtAddr = entry->natInfo.maxAddr;
> >+
> >+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
> >+        addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -
> >+                    ntohl(entry->natInfo.minAddr.ipv4_aligned);
> >+        addrIndex = hash % (addrDelta + 1);
> >+        ctAddr.ipv4_aligned = htonl(
> >+            ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);
> >+    } else {
> >+        // XXX: IPv6 not supported
> >+        return FALSE;
> >+    }
> >+
> >+    uint16_t port = firstPort;
> >+    BOOLEAN allPortsTried = FALSE;
> >+    BOOLEAN originalPortsTried = FALSE;
> >+    struct ct_addr firstAddr = ctAddr;
> >+    for (;;) {
> >+        if (entry->natInfo.natAction & NAT_ACTION_SRC) {
> >+            entry->rev_key.dst.addr = ctAddr;
> >+            entry->rev_key.dst.port = htons(port);
> >+        } else {
> >+            entry->rev_key.src.addr = ctAddr;
> >+            entry->rev_key.src.port = htons(port);
> >+        }
> >+
> >+        OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);
> >+
> >+        if (!natEntry) {
> >+            natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),
> >+                                                OVS_CT_POOL_TAG);
> >+            memcpy(&natEntry->key, &entry->key,
> >+                   sizeof natEntry->key);
> >+            memcpy(&natEntry->value, &entry->rev_key,
> >+                   sizeof natEntry->value);
> >+            natEntry->ctEntry = entry;
> >+            OvsNatAddEntry(natEntry);
> >+            return TRUE;
> >+        } else if (!allPortsTried) {
> >+            if (minPort == maxPort) {
> >+                allPortsTried = TRUE;
> >+            } else if (port == maxPort) {
> >+                port = minPort;
> >+            } else {
> >+                port++;
> >+            }
> >+            if (port == firstPort) {
> >+                allPortsTried = TRUE;
> >+            }
> >+        } else {
> >+            if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {
> >+                if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
> >+                    ctAddr.ipv4_aligned = htonl(
> >+                        ntohl(ctAddr.ipv4_aligned) + 1);
> >+                } else {
> >+                    // XXX: IPv6 not supported
> >+                    return FALSE;
> >+                }
> >+            } else {
> >+                ctAddr = entry->natInfo.minAddr;
> >+            }
> >+            if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {
> >+                if (!originalPortsTried) {
> >+                    originalPortsTried = TRUE;
> >+                    ctAddr = entry->natInfo.minAddr;
> >+                    minPort = MIN_NAT_EPHEMERAL_PORT;
> >+                    maxPort = MAX_NAT_EPHEMERAL_PORT;
> >+                } else {
> >+                    break;
> >+                }
> >+            }
> >+            firstPort = minPort;
> >+            port = firstPort;
> >+            allPortsTried = FALSE;
> >+        }
> >+    }
> >+    return FALSE;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatLookup
> >+ *     Look up a NAT entry with the given key in the NAT table.
> >+ *     If reverse is TRUE, look up a NAT entry with the given value
> instead.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+POVS_NAT_ENTRY
> >+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
> >+{
> >+    PLIST_ENTRY link;
> >+    POVS_NAT_ENTRY entry;
> >+
> >+    LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
> >+        if (reverse) {
> >+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
> >+
> >+            if (OvsNatKeyAreSame(ctKey, &entry->value)) {
> >+                return entry;
> >+            }
> >+        } else {
> >+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
> >+
> >+            if (OvsNatKeyAreSame(ctKey, &entry->key)) {
> >+                return entry;
> >+            }
> >+        }
> >+    }
> >+    return NULL;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatDeleteEntry
> >+ *     Delete a NAT entry.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
> >+{
> >+    if (entry == NULL) {
> >+        return;
> >+    }
> >+    RemoveEntryList(&entry->link);
> >+    RemoveEntryList(&entry->reverseLink);
> >+    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatDeleteKey
> >+ *     Delete a NAT entry with the given key.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatDeleteKey(const OVS_CT_KEY *key)
> >+{
> >+    OvsNatDeleteEntry(OvsNatLookup(key, FALSE));
> >+}
> >diff --git a/datapath-windows/ovsext/Conntrack-nat.h
> b/datapath-windows/ovsext/Conntrack-nat.h
> >new file mode 100644
> >index 0000000..99f2b67
> >--- /dev/null
> >+++ b/datapath-windows/ovsext/Conntrack-nat.h
> >@@ -0,0 +1,39 @@
> >+#ifndef _CONNTRACK_NAT_H
> >+#define _CONNTRACK_NAT_H
> >+
> >+#include "precomp.h"
> >+#include "Flow.h"
> >+#include "Debug.h"
> >+#include <stddef.h>
> >+#include "Conntrack.h"
> >+
> >+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)
> >+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)
> >+
> >+typedef struct OVS_NAT_ENTRY {
> >+    LIST_ENTRY link;
> >+    LIST_ENTRY reverseLink;
> >+    OVS_CT_KEY key;
> >+    OVS_CT_KEY value;
> >+    POVS_CT_ENTRY  ctEntry;
> >+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;
> >+
> >+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {
> >+    return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));
> >+}
> >+
> >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT);
> >+VOID OvsNatFlush(UINT16 zone);
> >+
> >+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);
> >+
> >+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);
> >+VOID OvsNatDeleteKey(const OVS_CT_KEY *key);
> >+VOID OvsNatCleanup();
> >+
> >+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);
> >+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);
> >+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY
> *entry,
> >+                  UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);
> >+
> >+#endif
> >\ No newline at end of file
> >--
> >2.10.2.windows.1
> >
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox

Patch

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 53983ae..296e785 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -16,7 +16,9 @@  EXTRA_DIST += \
 	datapath-windows/ovsext/Conntrack-icmp.c \
 	datapath-windows/ovsext/Conntrack-other.c \
 	datapath-windows/ovsext/Conntrack-related.c \
+    datapath-windows/ovsext/Conntrack-nat.c \
 	datapath-windows/ovsext/Conntrack-tcp.c \
+    datapath-windows/ovsext/Conntrack-nat.h \
 	datapath-windows/ovsext/Conntrack.c \
 	datapath-windows/ovsext/Conntrack.h \
 	datapath-windows/ovsext/Datapath.c \
diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
new file mode 100644
index 0000000..4930694
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -0,0 +1,437 @@ 
+#include "Conntrack-nat.h"
+#include "Jhash.h"
+
+PLIST_ENTRY ovsNatTable = NULL;
+PLIST_ENTRY ovsUnNatTable = NULL;
+static PNDIS_RW_LOCK_EX ovsNatLock;
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsHashNatKey
+ *     Hash NAT related fields in a Conntrack key.
+ *---------------------------------------------------------------------------
+ */
+static __inline UINT32
+OvsHashNatKey(const OVS_CT_KEY *key)
+{
+    UINT32 hash = 0;
+#define HASH_ADD(field) \
+    hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)
+
+    HASH_ADD(src.addr.ipv4_aligned);
+    HASH_ADD(dst.addr.ipv4_aligned);
+    HASH_ADD(src.port);
+    HASH_ADD(dst.port);
+    HASH_ADD(zone);
+#undef HASH_ADD
+    return hash;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsNatKeyAreSame
+ *     Compare NAT related fields in a Conntrack key.
+ *---------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)
+{
+    // XXX: Compare IPv6 key as well
+#define FIELD_COMPARE(field) \
+    if (key1->field != key2->field) return FALSE
+
+    FIELD_COMPARE(src.addr.ipv4_aligned);
+    FIELD_COMPARE(dst.addr.ipv4_aligned);
+    FIELD_COMPARE(src.port);
+    FIELD_COMPARE(dst.port);
+    FIELD_COMPARE(zone);
+    return TRUE;
+#undef FIELD_COMPARE
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsNaGetBucket
+ *     Returns the row of NAT table that has the same hash as the given NAT
+ *     hash key. If isReverse is TRUE, returns the row of reverse NAT table
+ *     instead.
+ *---------------------------------------------------------------------------
+ */
+static __inline PLIST_ENTRY
+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)
+{
+    uint32_t hash = OvsHashNatKey(key);
+    if (isReverse) {
+        return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];
+    } else {
+        return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];
+    }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsNatInit
+ *     Initialize NAT related resources.
+ *---------------------------------------------------------------------------
+ */
+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context)
+{
+    ASSERT(ovsNatTable == NULL);
+
+    /* Init the sync-lock */
+    ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle);
+    if (ovsNatLock == NULL) {
+        goto failNoMem;
+    }
+
+    /* Init the Hash Buffer */
+    ovsNatTable = OvsAllocateMemoryWithTag(
+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
+        OVS_CT_POOL_TAG);
+    if (ovsNatTable == NULL) {
+        goto freeNatLock;
+    }
+
+    ovsUnNatTable = OvsAllocateMemoryWithTag(
+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
+        OVS_CT_POOL_TAG);
+    if (ovsUnNatTable == NULL) {
+        goto freeNatTable;
+    }
+
+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
+        InitializeListHead(&ovsNatTable[i]);
+        InitializeListHead(&ovsUnNatTable[i]);
+    }
+    return STATUS_SUCCESS;
+
+freeNatTable:
+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
+freeNatLock:
+    NdisFreeRWLock(ovsNatLock);
+failNoMem:
+    return STATUS_INSUFFICIENT_RESOURCES;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatFlush
+ *     Flushes out all NAT entries that match the given zone.
+ *----------------------------------------------------------------------------
+ */
+VOID OvsNatFlush(UINT16 zone)
+{
+    LOCK_STATE_EX lockState;
+    PLIST_ENTRY link, next;
+    NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0);
+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
+        LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
+            POVS_NAT_ENTRY entry =
+                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
+            /* zone is a non-zero value */
+            if (!zone || zone == entry->key.zone) {
+                OvsNatDeleteEntry(entry);
+            }
+        }
+    }
+    NdisReleaseRWLock(ovsNatLock, &lockState);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatCleanup
+ *     Releases all NAT related resources.
+ *----------------------------------------------------------------------------
+ */
+VOID OvsNatCleanup()
+{
+    if (ovsNatTable == NULL) return;
+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
+    OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
+    NdisFreeRWLock(ovsNatLock);
+    ovsNatTable = NULL;
+    ovsUnNatTable = NULL;
+    ovsNatLock = NULL;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatPacket
+ *     Performs NAT operation on the packet by replacing the source/destinaton
+ *     address/port based on natAction. If reverse is TRUE, perform unNAT
+ *     instead.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
+             const OVS_CT_ENTRY *entry,
+             UINT16 natAction,
+             OvsFlowKey *key,
+             BOOLEAN reverse)
+{
+    UINT32 natFlag;
+    const struct ct_endpoint* endpoint;
+    /* When it is NAT, only entry->rev_key contains NATTED address;
+       When it is unNAT, only entry->key contains the UNNATTED address;*/
+    const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
+    BOOLEAN isSrcNat;
+
+    if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
+        return;
+    }
+    isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
+                ((natAction & NAT_ACTION_DST) && reverse));
+
+    if (isSrcNat) {
+        /* Flag is set to SNAT for SNAT case and the reverse DNAT case */
+        natFlag = OVS_CS_F_SRC_NAT;
+        /* Note that ctKey is the key in the other direction, so
+           endpoint has to be reverted, i.e. ctKey->dst for SNAT
+           and ctKey->src for DNAT */
+        endpoint = &ctKey->dst;
+    } else {
+        natFlag = OVS_CS_F_DST_NAT;
+        endpoint = &ctKey->src;
+    }
+    key->ct.state |= natFlag;
+    if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
+        OvsUpdateAddressAndPort(ovsFwdCtx,
+                                endpoint->addr.ipv4_aligned,
+                                endpoint->port, isSrcNat);
+        if (isSrcNat) {
+            key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
+        } else {
+            key->ipKey.nwDst = endpoint->addr.ipv4_aligned;
+        }
+    } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
+        // XXX: IPv6 packet not supported yet.
+        return;
+    }
+    if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
+        if (isSrcNat) {
+            if (key->ipKey.l4.tpSrc != 0) {
+                key->ipKey.l4.tpSrc = endpoint->port;
+            }
+        } else {
+            if (key->ipKey.l4.tpDst != 0) {
+                key->ipKey.l4.tpDst = endpoint->port;
+            }
+        }
+    }
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatHashRange
+ *     Compute hash for a range of addresses specified in natInfo.
+ *----------------------------------------------------------------------------
+ */
+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)
+{
+    UINT32 hash = basis;
+#define HASH_ADD(field) \
+    hash = OvsJhashBytes(&field, sizeof(field), hash)
+
+    HASH_ADD(entry->natInfo.minAddr);
+    HASH_ADD(entry->natInfo.maxAddr);
+    HASH_ADD(entry->key.dl_type);
+    HASH_ADD(entry->key.nw_proto);
+    HASH_ADD(entry->key.zone);
+#undef HASH_ADD
+    return hash;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatAddEntry
+ *     Add an entry to the NAT table. Also updates the reverse NAT lookup
+ *     table.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatAddEntry(OVS_NAT_ENTRY* entry)
+{
+    InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),
+                   &entry->link);
+    InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),
+                   &entry->reverseLink);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatCtEntry
+ *     Update an Conntrack entry with NAT information. Translated address and
+ *     port will be generated and write back to the conntrack entry as a
+ *     result.
+ *----------------------------------------------------------------------------
+ */
+BOOLEAN
+OvsNatCtEntry(OVS_CT_ENTRY *entry)
+{
+    const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;
+    const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;
+
+    uint16_t minPort;
+    uint16_t maxPort;
+    uint16_t firstPort;
+
+    uint32_t hash = OvsNatHashRange(entry, 0);
+
+    if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
+        (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {
+        firstPort = minPort = maxPort = ntohs(entry->key.src.port);
+    } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&
+               (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {
+        firstPort = minPort = maxPort = ntohs(entry->key.dst.port);
+    } else {
+        uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort;
+        uint16_t portIndex = (uint16_t) hash % (portDelta + 1);
+        firstPort = entry->natInfo.minPort + portIndex;
+        minPort = entry->natInfo.minPort;
+        maxPort = entry->natInfo.maxPort;
+    }
+
+    uint32_t addrDelta = 0;
+    uint32_t addrIndex;
+    struct ct_addr ctAddr, maxCtAddr;
+    memset(&ctAddr, 0, sizeof ctAddr);
+    memset(&maxCtAddr, 0, sizeof maxCtAddr);
+    maxCtAddr = entry->natInfo.maxAddr;
+
+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
+        addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -
+                    ntohl(entry->natInfo.minAddr.ipv4_aligned);
+        addrIndex = hash % (addrDelta + 1);
+        ctAddr.ipv4_aligned = htonl(
+            ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);
+    } else {
+        // XXX: IPv6 not supported
+        return FALSE;
+    }
+
+    uint16_t port = firstPort;
+    BOOLEAN allPortsTried = FALSE;
+    BOOLEAN originalPortsTried = FALSE;
+    struct ct_addr firstAddr = ctAddr;
+    for (;;) {
+        if (entry->natInfo.natAction & NAT_ACTION_SRC) {
+            entry->rev_key.dst.addr = ctAddr;
+            entry->rev_key.dst.port = htons(port);
+        } else {
+            entry->rev_key.src.addr = ctAddr;
+            entry->rev_key.src.port = htons(port);
+        }
+
+        OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);
+
+        if (!natEntry) {
+            natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),
+                                                OVS_CT_POOL_TAG);
+            memcpy(&natEntry->key, &entry->key,
+                   sizeof natEntry->key);
+            memcpy(&natEntry->value, &entry->rev_key,
+                   sizeof natEntry->value);
+            natEntry->ctEntry = entry;
+            OvsNatAddEntry(natEntry);
+            return TRUE;
+        } else if (!allPortsTried) {
+            if (minPort == maxPort) {
+                allPortsTried = TRUE;
+            } else if (port == maxPort) {
+                port = minPort;
+            } else {
+                port++;
+            }
+            if (port == firstPort) {
+                allPortsTried = TRUE;
+            }
+        } else {
+            if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {
+                if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
+                    ctAddr.ipv4_aligned = htonl(
+                        ntohl(ctAddr.ipv4_aligned) + 1);
+                } else {
+                    // XXX: IPv6 not supported
+                    return FALSE;
+                }
+            } else {
+                ctAddr = entry->natInfo.minAddr;
+            }
+            if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {
+                if (!originalPortsTried) {
+                    originalPortsTried = TRUE;
+                    ctAddr = entry->natInfo.minAddr;
+                    minPort = MIN_NAT_EPHEMERAL_PORT;
+                    maxPort = MAX_NAT_EPHEMERAL_PORT;
+                } else {
+                    break;
+                }
+            }
+            firstPort = minPort;
+            port = firstPort;
+            allPortsTried = FALSE;
+        }
+    }
+    return FALSE;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatLookup
+ *     Look up a NAT entry with the given key in the NAT table.
+ *     If reverse is TRUE, look up a NAT entry with the given value instead.
+ *----------------------------------------------------------------------------
+ */
+POVS_NAT_ENTRY
+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
+{
+    PLIST_ENTRY link;
+    POVS_NAT_ENTRY entry;
+
+    LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
+        if (reverse) {
+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
+
+            if (OvsNatKeyAreSame(ctKey, &entry->value)) {
+                return entry;
+            }
+        } else {
+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
+
+            if (OvsNatKeyAreSame(ctKey, &entry->key)) {
+                return entry;
+            }
+        }
+    }
+    return NULL;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatDeleteEntry
+ *     Delete a NAT entry.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
+{
+    if (entry == NULL) {
+        return;
+    }
+    RemoveEntryList(&entry->link);
+    RemoveEntryList(&entry->reverseLink);
+    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatDeleteKey
+ *     Delete a NAT entry with the given key.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatDeleteKey(const OVS_CT_KEY *key)
+{
+    OvsNatDeleteEntry(OvsNatLookup(key, FALSE));
+}
diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h
new file mode 100644
index 0000000..99f2b67
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack-nat.h
@@ -0,0 +1,39 @@ 
+#ifndef _CONNTRACK_NAT_H
+#define _CONNTRACK_NAT_H
+
+#include "precomp.h"
+#include "Flow.h"
+#include "Debug.h"
+#include <stddef.h>
+#include "Conntrack.h"
+
+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)
+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)
+
+typedef struct OVS_NAT_ENTRY {
+    LIST_ENTRY link;
+    LIST_ENTRY reverseLink;
+    OVS_CT_KEY key;
+    OVS_CT_KEY value;
+    POVS_CT_ENTRY  ctEntry;
+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;
+
+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {
+    return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));
+}
+
+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT);
+VOID OvsNatFlush(UINT16 zone);
+
+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);
+
+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);
+VOID OvsNatDeleteKey(const OVS_CT_KEY *key);
+VOID OvsNatCleanup();
+
+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);
+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);
+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry,
+                  UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);
+
+#endif
\ No newline at end of file