Message ID | CY1PR0501MB13374EB61E056541ED2F8BA4D43C0@CY1PR0501MB1337.namprd05.prod.outlook.com |
---|---|
State | Superseded |
Headers | show |
Hi Yin,
Thank you for the patches. The patch looks good, I only have a few comments.
Please find my comments inline prefixed with [AK]
Regards,
Anand Kumar
On 3/22/17, 3:11 PM, "ovs-dev-bounces@openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces@openvswitch.org on behalf of linyi@vmware.com> wrote:
Signed-off-by: Yin Lin <linyi@vmware.com>
Issue: #
Change-Id: I6f37360c36525548b343f0016304015fec8aba7d
---
datapath-windows/automake.mk | 2 +
datapath-windows/ovsext/Conntrack-nat.c | 437 ++++++++++++++++++++++++++++++++
datapath-windows/ovsext/Conntrack-nat.h | 39 +++
3 files changed, 478 insertions(+)
create mode 100644 datapath-windows/ovsext/Conntrack-nat.c
create mode 100644 datapath-windows/ovsext/Conntrack-nat.h
diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 53983ae..296e785 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -16,7 +16,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/Conntrack-icmp.c \
datapath-windows/ovsext/Conntrack-other.c \
datapath-windows/ovsext/Conntrack-related.c \
+ datapath-windows/ovsext/Conntrack-nat.c \
datapath-windows/ovsext/Conntrack-tcp.c \
+ datapath-windows/ovsext/Conntrack-nat.h \
datapath-windows/ovsext/Conntrack.c \
datapath-windows/ovsext/Conntrack.h \
datapath-windows/ovsext/Datapath.c \
diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c
new file mode 100644
index 0000000..4930694
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -0,0 +1,437 @@
+#include "Conntrack-nat.h"
+#include "Jhash.h"
+
+PLIST_ENTRY ovsNatTable = NULL;
+PLIST_ENTRY ovsUnNatTable = NULL;
+static PNDIS_RW_LOCK_EX ovsNatLock;
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsHashNatKey
+ * Hash NAT related fields in a Conntrack key.
+ *---------------------------------------------------------------------------
+ */
+static __inline UINT32
+OvsHashNatKey(const OVS_CT_KEY *key)
+{
+ UINT32 hash = 0;
+#define HASH_ADD(field) \
+ hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)
+
+ HASH_ADD(src.addr.ipv4_aligned);
+ HASH_ADD(dst.addr.ipv4_aligned);
+ HASH_ADD(src.port);
+ HASH_ADD(dst.port);
+ HASH_ADD(zone);
+#undef HASH_ADD
+ return hash;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsNatKeyAreSame
+ * Compare NAT related fields in a Conntrack key.
+ *---------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)
+{
+ // XXX: Compare IPv6 key as well
+#define FIELD_COMPARE(field) \
+ if (key1->field != key2->field) return FALSE
+
+ FIELD_COMPARE(src.addr.ipv4_aligned);
+ FIELD_COMPARE(dst.addr.ipv4_aligned);
+ FIELD_COMPARE(src.port);
+ FIELD_COMPARE(dst.port);
+ FIELD_COMPARE(zone);
+ return TRUE;
+#undef FIELD_COMPARE
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsNaGetBucket
+ * Returns the row of NAT table that has the same hash as the given NAT
+ * hash key. If isReverse is TRUE, returns the row of reverse NAT table
+ * instead.
+ *---------------------------------------------------------------------------
+ */
+static __inline PLIST_ENTRY
+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)
+{
+ uint32_t hash = OvsHashNatKey(key);
+ if (isReverse) {
+ return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];
+ } else {
+ return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ * OvsNatInit
+ * Initialize NAT related resources.
+ *---------------------------------------------------------------------------
+ */
+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context)
+{
+ ASSERT(ovsNatTable == NULL);
+
+ /* Init the sync-lock */
+ ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle);
+ if (ovsNatLock == NULL) {
+ goto failNoMem;
+ }
+
+ /* Init the Hash Buffer */
+ ovsNatTable = OvsAllocateMemoryWithTag(
+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
+ OVS_CT_POOL_TAG);
+ if (ovsNatTable == NULL) {
+ goto freeNatLock;
+ }
+
+ ovsUnNatTable = OvsAllocateMemoryWithTag(
+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
+ OVS_CT_POOL_TAG);
+ if (ovsUnNatTable == NULL) {
+ goto freeNatTable;
+ }
+
+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
+ InitializeListHead(&ovsNatTable[i]);
+ InitializeListHead(&ovsUnNatTable[i]);
+ }
+ return STATUS_SUCCESS;
+
+freeNatTable:
+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
+freeNatLock:
+ NdisFreeRWLock(ovsNatLock);
+failNoMem:
+ return STATUS_INSUFFICIENT_RESOURCES;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatFlush
+ * Flushes out all NAT entries that match the given zone.
+ *----------------------------------------------------------------------------
+ */
+VOID OvsNatFlush(UINT16 zone)
+{
+ LOCK_STATE_EX lockState;
+ PLIST_ENTRY link, next;
+ NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0);
+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
+ LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
+ POVS_NAT_ENTRY entry =
+ CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
+ /* zone is a non-zero value */
+ if (!zone || zone == entry->key.zone) {
+ OvsNatDeleteEntry(entry);
+ }
+ }
+ }
+ NdisReleaseRWLock(ovsNatLock, &lockState);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatCleanup
+ * Releases all NAT related resources.
+ *----------------------------------------------------------------------------
+ */
+VOID OvsNatCleanup()
+{
+ if (ovsNatTable == NULL) return;
+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
+ OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
+ NdisFreeRWLock(ovsNatLock);
+ ovsNatTable = NULL;
+ ovsUnNatTable = NULL;
+ ovsNatLock = NULL;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatPacket
+ * Performs NAT operation on the packet by replacing the source/destinaton
+ * address/port based on natAction. If reverse is TRUE, perform unNAT
+ * instead.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
+ const OVS_CT_ENTRY *entry,
+ UINT16 natAction,
+ OvsFlowKey *key,
+ BOOLEAN reverse)
+{
+ UINT32 natFlag;
+ const struct ct_endpoint* endpoint;
+ /* When it is NAT, only entry->rev_key contains NATTED address;
+ When it is unNAT, only entry->key contains the UNNATTED address;*/
+ const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
+ BOOLEAN isSrcNat;
+
+ if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
+ return;
+ }
+ isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
+ ((natAction & NAT_ACTION_DST) && reverse));
+
+ if (isSrcNat) {
+ /* Flag is set to SNAT for SNAT case and the reverse DNAT case */
+ natFlag = OVS_CS_F_SRC_NAT;
+ /* Note that ctKey is the key in the other direction, so
+ endpoint has to be reverted, i.e. ctKey->dst for SNAT
+ and ctKey->src for DNAT */
+ endpoint = &ctKey->dst;
+ } else {
+ natFlag = OVS_CS_F_DST_NAT;
+ endpoint = &ctKey->src;
+ }
+ key->ct.state |= natFlag;
+ if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
+ OvsUpdateAddressAndPort(ovsFwdCtx,
+ endpoint->addr.ipv4_aligned,
+ endpoint->port, isSrcNat);
+ if (isSrcNat) {
+ key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
+ } else {
+ key->ipKey.nwDst = endpoint->addr.ipv4_aligned;
+ }
+ } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
+ // XXX: IPv6 packet not supported yet.
+ return;
+ }
+ if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
+ if (isSrcNat) {
+ if (key->ipKey.l4.tpSrc != 0) {
+ key->ipKey.l4.tpSrc = endpoint->port;
+ }
+ } else {
+ if (key->ipKey.l4.tpDst != 0) {
+ key->ipKey.l4.tpDst = endpoint->port;
+ }
+ }
+ }
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatHashRange
+ * Compute hash for a range of addresses specified in natInfo.
+ *----------------------------------------------------------------------------
+ */
+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)
+{
+ UINT32 hash = basis;
+#define HASH_ADD(field) \
+ hash = OvsJhashBytes(&field, sizeof(field), hash)
+
+ HASH_ADD(entry->natInfo.minAddr);
+ HASH_ADD(entry->natInfo.maxAddr);
+ HASH_ADD(entry->key.dl_type);
+ HASH_ADD(entry->key.nw_proto);
+ HASH_ADD(entry->key.zone);
+#undef HASH_ADD
+ return hash;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatAddEntry
+ * Add an entry to the NAT table. Also updates the reverse NAT lookup
+ * table.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatAddEntry(OVS_NAT_ENTRY* entry)
+{
+ InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),
+ &entry->link);
+ InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),
+ &entry->reverseLink);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatCtEntry
+ * Update an Conntrack entry with NAT information. Translated address and
+ * port will be generated and write back to the conntrack entry as a
+ * result.
+ *----------------------------------------------------------------------------
+ */
+BOOLEAN
+OvsNatCtEntry(OVS_CT_ENTRY *entry)
+{
+ const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;
+ const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;
+
+ uint16_t minPort;
+ uint16_t maxPort;
+ uint16_t firstPort;
+
+ uint32_t hash = OvsNatHashRange(entry, 0);
+
+ if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
+ (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {
+ firstPort = minPort = maxPort = ntohs(entry->key.src.port);
+ } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&
+ (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {
+ firstPort = minPort = maxPort = ntohs(entry->key.dst.port);
+ } else {
+ uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort;
+ uint16_t portIndex = (uint16_t) hash % (portDelta + 1);
+ firstPort = entry->natInfo.minPort + portIndex;
+ minPort = entry->natInfo.minPort;
+ maxPort = entry->natInfo.maxPort;
+ }
+
+ uint32_t addrDelta = 0;
+ uint32_t addrIndex;
+ struct ct_addr ctAddr, maxCtAddr;
+ memset(&ctAddr, 0, sizeof ctAddr);
+ memset(&maxCtAddr, 0, sizeof maxCtAddr);
+ maxCtAddr = entry->natInfo.maxAddr;
+
+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
+ addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -
+ ntohl(entry->natInfo.minAddr.ipv4_aligned);
+ addrIndex = hash % (addrDelta + 1);
+ ctAddr.ipv4_aligned = htonl(
+ ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);
+ } else {
+ // XXX: IPv6 not supported
+ return FALSE;
+ }
+
+ uint16_t port = firstPort;
+ BOOLEAN allPortsTried = FALSE;
+ BOOLEAN originalPortsTried = FALSE;
+ struct ct_addr firstAddr = ctAddr;
+ for (;;) {
+ if (entry->natInfo.natAction & NAT_ACTION_SRC) {
+ entry->rev_key.dst.addr = ctAddr;
+ entry->rev_key.dst.port = htons(port);
+ } else {
+ entry->rev_key.src.addr = ctAddr;
+ entry->rev_key.src.port = htons(port);
+ }
+
+ OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);
+
+ if (!natEntry) {
+ natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),
+ OVS_CT_POOL_TAG);
+ memcpy(&natEntry->key, &entry->key,
+ sizeof natEntry->key);
+ memcpy(&natEntry->value, &entry->rev_key,
+ sizeof natEntry->value);
+ natEntry->ctEntry = entry;
+ OvsNatAddEntry(natEntry);
+ return TRUE;
+ } else if (!allPortsTried) {
+ if (minPort == maxPort) {
+ allPortsTried = TRUE;
+ } else if (port == maxPort) {
+ port = minPort;
+ } else {
+ port++;
+ }
+ if (port == firstPort) {
+ allPortsTried = TRUE;
+ }
+ } else {
+ if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {
+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
+ ctAddr.ipv4_aligned = htonl(
+ ntohl(ctAddr.ipv4_aligned) + 1);
+ } else {
+ // XXX: IPv6 not supported
+ return FALSE;
+ }
+ } else {
+ ctAddr = entry->natInfo.minAddr;
+ }
+ if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {
+ if (!originalPortsTried) {
+ originalPortsTried = TRUE;
+ ctAddr = entry->natInfo.minAddr;
+ minPort = MIN_NAT_EPHEMERAL_PORT;
+ maxPort = MAX_NAT_EPHEMERAL_PORT;
+ } else {
+ break;
+ }
+ }
+ firstPort = minPort;
+ port = firstPort;
+ allPortsTried = FALSE;
+ }
+ }
+ return FALSE;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatLookup
+ * Look up a NAT entry with the given key in the NAT table.
+ * If reverse is TRUE, look up a NAT entry with the given value instead.
+ *----------------------------------------------------------------------------
+ */
+POVS_NAT_ENTRY
+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
+{
+ PLIST_ENTRY link;
+ POVS_NAT_ENTRY entry;
+
+ LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
+ if (reverse) {
+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
+
+ if (OvsNatKeyAreSame(ctKey, &entry->value)) {
+ return entry;
+ }
+ } else {
+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
+
+ if (OvsNatKeyAreSame(ctKey, &entry->key)) {
+ return entry;
+ }
+ }
+ }
+ return NULL;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatDeleteEntry
+ * Delete a NAT entry.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
+{
+ if (entry == NULL) {
+ return;
+ }
+ RemoveEntryList(&entry->link);
+ RemoveEntryList(&entry->reverseLink);
+ OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsNatDeleteKey
+ * Delete a NAT entry with the given key.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsNatDeleteKey(const OVS_CT_KEY *key)
+{
+ OvsNatDeleteEntry(OvsNatLookup(key, FALSE));
+}
diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h
new file mode 100644
index 0000000..99f2b67
--- /dev/null
+++ b/datapath-windows/ovsext/Conntrack-nat.h
@@ -0,0 +1,39 @@
+#ifndef _CONNTRACK_NAT_H
+#define _CONNTRACK_NAT_H
+
+#include "precomp.h"
+#include "Flow.h"
+#include "Debug.h"
+#include <stddef.h>
+#include "Conntrack.h"
+
+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)
+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)
+
+typedef struct OVS_NAT_ENTRY {
+ LIST_ENTRY link;
+ LIST_ENTRY reverseLink;
+ OVS_CT_KEY key;
+ OVS_CT_KEY value;
+ POVS_CT_ENTRY ctEntry;
+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;
+
+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {
[AK] – Is there a need for double inversion here?
+ return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));
+}
+
+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT);
+VOID OvsNatFlush(UINT16 zone);
+
+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);
+
+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);
+VOID OvsNatDeleteKey(const OVS_CT_KEY *key);
+VOID OvsNatCleanup();
+
+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);
+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);
+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry,
+ UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);
+
+#endif
[AK] – new line should be added to the end of file.
\ No newline at end of file
--
2.10.2.windows.1
Hi Yin, Thanks for sending over the patch. While it looks good for the most part, there are certain indentations that need fixing. I will send those out separately. I see that you have allocated - ovsNatLock and have added in stubs for flush and cleanup similar to Conntrack.c. However, I don’t see you taking out the lock for accessing the NAT tables. How are you verifying thread-safety here? Thanks, Sairam On 3/22/17, 3:11 PM, "ovs-dev-bounces@openvswitch.org on behalf of Yin Lin" <ovs-dev-bounces@openvswitch.org on behalf of linyi@vmware.com> wrote: >Signed-off-by: Yin Lin <linyi@vmware.com> > >Issue: # >Change-Id: I6f37360c36525548b343f0016304015fec8aba7d >--- > datapath-windows/automake.mk | 2 + > datapath-windows/ovsext/Conntrack-nat.c | 437 ++++++++++++++++++++++++++++++++ > datapath-windows/ovsext/Conntrack-nat.h | 39 +++ > 3 files changed, 478 insertions(+) > create mode 100644 datapath-windows/ovsext/Conntrack-nat.c > create mode 100644 datapath-windows/ovsext/Conntrack-nat.h > >diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk >index 53983ae..296e785 100644 >--- a/datapath-windows/automake.mk >+++ b/datapath-windows/automake.mk >@@ -16,7 +16,9 @@ EXTRA_DIST += \ > datapath-windows/ovsext/Conntrack-icmp.c \ > datapath-windows/ovsext/Conntrack-other.c \ > datapath-windows/ovsext/Conntrack-related.c \ >+ datapath-windows/ovsext/Conntrack-nat.c \ > datapath-windows/ovsext/Conntrack-tcp.c \ >+ datapath-windows/ovsext/Conntrack-nat.h \ > datapath-windows/ovsext/Conntrack.c \ > datapath-windows/ovsext/Conntrack.h \ > datapath-windows/ovsext/Datapath.c \ >diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c >new file mode 100644 >index 0000000..4930694 >--- /dev/null >+++ b/datapath-windows/ovsext/Conntrack-nat.c >@@ -0,0 +1,437 @@ >+#include "Conntrack-nat.h" >+#include "Jhash.h" >+ >+PLIST_ENTRY ovsNatTable = NULL; >+PLIST_ENTRY ovsUnNatTable = NULL; >+static PNDIS_RW_LOCK_EX ovsNatLock; >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsHashNatKey >+ * Hash NAT related fields in a Conntrack key. >+ *--------------------------------------------------------------------------- >+ */ >+static __inline UINT32 >+OvsHashNatKey(const OVS_CT_KEY *key) >+{ >+ UINT32 hash = 0; >+#define HASH_ADD(field) \ >+ hash = OvsJhashBytes(&key->field, sizeof(key->field), hash) >+ >+ HASH_ADD(src.addr.ipv4_aligned); >+ HASH_ADD(dst.addr.ipv4_aligned); >+ HASH_ADD(src.port); >+ HASH_ADD(dst.port); >+ HASH_ADD(zone); >+#undef HASH_ADD >+ return hash; >+} >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsNatKeyAreSame >+ * Compare NAT related fields in a Conntrack key. >+ *--------------------------------------------------------------------------- >+ */ >+static __inline BOOLEAN >+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2) >+{ >+ // XXX: Compare IPv6 key as well >+#define FIELD_COMPARE(field) \ >+ if (key1->field != key2->field) return FALSE >+ >+ FIELD_COMPARE(src.addr.ipv4_aligned); >+ FIELD_COMPARE(dst.addr.ipv4_aligned); >+ FIELD_COMPARE(src.port); >+ FIELD_COMPARE(dst.port); >+ FIELD_COMPARE(zone); >+ return TRUE; >+#undef FIELD_COMPARE >+} >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsNaGetBucket >+ * Returns the row of NAT table that has the same hash as the given NAT >+ * hash key. If isReverse is TRUE, returns the row of reverse NAT table >+ * instead. >+ *--------------------------------------------------------------------------- >+ */ >+static __inline PLIST_ENTRY >+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse) >+{ >+ uint32_t hash = OvsHashNatKey(key); >+ if (isReverse) { >+ return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK]; >+ } else { >+ return &ovsNatTable[hash & NAT_HASH_TABLE_MASK]; >+ } >+} >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsNatInit >+ * Initialize NAT related resources. >+ *--------------------------------------------------------------------------- >+ */ >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context) >+{ >+ ASSERT(ovsNatTable == NULL); >+ >+ /* Init the sync-lock */ >+ ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle); >+ if (ovsNatLock == NULL) { >+ goto failNoMem; >+ } >+ >+ /* Init the Hash Buffer */ >+ ovsNatTable = OvsAllocateMemoryWithTag( >+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, >+ OVS_CT_POOL_TAG); >+ if (ovsNatTable == NULL) { >+ goto freeNatLock; >+ } >+ >+ ovsUnNatTable = OvsAllocateMemoryWithTag( >+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, >+ OVS_CT_POOL_TAG); >+ if (ovsUnNatTable == NULL) { >+ goto freeNatTable; >+ } >+ >+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { >+ InitializeListHead(&ovsNatTable[i]); >+ InitializeListHead(&ovsUnNatTable[i]); >+ } >+ return STATUS_SUCCESS; >+ >+freeNatTable: >+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); >+freeNatLock: >+ NdisFreeRWLock(ovsNatLock); >+failNoMem: >+ return STATUS_INSUFFICIENT_RESOURCES; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatFlush >+ * Flushes out all NAT entries that match the given zone. >+ *---------------------------------------------------------------------------- >+ */ >+VOID OvsNatFlush(UINT16 zone) >+{ >+ LOCK_STATE_EX lockState; >+ PLIST_ENTRY link, next; >+ NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0); >+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { >+ LIST_FORALL_SAFE(&ovsNatTable[i], link, next) { >+ POVS_NAT_ENTRY entry = >+ CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); >+ /* zone is a non-zero value */ >+ if (!zone || zone == entry->key.zone) { >+ OvsNatDeleteEntry(entry); >+ } >+ } >+ } >+ NdisReleaseRWLock(ovsNatLock, &lockState); >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatCleanup >+ * Releases all NAT related resources. >+ *---------------------------------------------------------------------------- >+ */ >+VOID OvsNatCleanup() >+{ >+ if (ovsNatTable == NULL) return; >+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); >+ OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); >+ NdisFreeRWLock(ovsNatLock); >+ ovsNatTable = NULL; >+ ovsUnNatTable = NULL; >+ ovsNatLock = NULL; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatPacket >+ * Performs NAT operation on the packet by replacing the source/destinaton >+ * address/port based on natAction. If reverse is TRUE, perform unNAT >+ * instead. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatPacket(OvsForwardingContext *ovsFwdCtx, >+ const OVS_CT_ENTRY *entry, >+ UINT16 natAction, >+ OvsFlowKey *key, >+ BOOLEAN reverse) >+{ >+ UINT32 natFlag; >+ const struct ct_endpoint* endpoint; >+ /* When it is NAT, only entry->rev_key contains NATTED address; >+ When it is unNAT, only entry->key contains the UNNATTED address;*/ >+ const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key; >+ BOOLEAN isSrcNat; >+ >+ if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { >+ return; >+ } >+ isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || >+ ((natAction & NAT_ACTION_DST) && reverse)); >+ >+ if (isSrcNat) { >+ /* Flag is set to SNAT for SNAT case and the reverse DNAT case */ >+ natFlag = OVS_CS_F_SRC_NAT; >+ /* Note that ctKey is the key in the other direction, so >+ endpoint has to be reverted, i.e. ctKey->dst for SNAT >+ and ctKey->src for DNAT */ >+ endpoint = &ctKey->dst; >+ } else { >+ natFlag = OVS_CS_F_DST_NAT; >+ endpoint = &ctKey->src; >+ } >+ key->ct.state |= natFlag; >+ if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) { >+ OvsUpdateAddressAndPort(ovsFwdCtx, >+ endpoint->addr.ipv4_aligned, >+ endpoint->port, isSrcNat); >+ if (isSrcNat) { >+ key->ipKey.nwSrc = endpoint->addr.ipv4_aligned; >+ } else { >+ key->ipKey.nwDst = endpoint->addr.ipv4_aligned; >+ } >+ } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ >+ // XXX: IPv6 packet not supported yet. >+ return; >+ } >+ if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { >+ if (isSrcNat) { >+ if (key->ipKey.l4.tpSrc != 0) { >+ key->ipKey.l4.tpSrc = endpoint->port; >+ } >+ } else { >+ if (key->ipKey.l4.tpDst != 0) { >+ key->ipKey.l4.tpDst = endpoint->port; >+ } >+ } >+ } >+} >+ >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatHashRange >+ * Compute hash for a range of addresses specified in natInfo. >+ *---------------------------------------------------------------------------- >+ */ >+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) >+{ >+ UINT32 hash = basis; >+#define HASH_ADD(field) \ >+ hash = OvsJhashBytes(&field, sizeof(field), hash) >+ >+ HASH_ADD(entry->natInfo.minAddr); >+ HASH_ADD(entry->natInfo.maxAddr); >+ HASH_ADD(entry->key.dl_type); >+ HASH_ADD(entry->key.nw_proto); >+ HASH_ADD(entry->key.zone); >+#undef HASH_ADD >+ return hash; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatAddEntry >+ * Add an entry to the NAT table. Also updates the reverse NAT lookup >+ * table. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatAddEntry(OVS_NAT_ENTRY* entry) >+{ >+ InsertHeadList(OvsNatGetBucket(&entry->key, FALSE), >+ &entry->link); >+ InsertHeadList(OvsNatGetBucket(&entry->value, TRUE), >+ &entry->reverseLink); >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatCtEntry >+ * Update an Conntrack entry with NAT information. Translated address and >+ * port will be generated and write back to the conntrack entry as a >+ * result. >+ *---------------------------------------------------------------------------- >+ */ >+BOOLEAN >+OvsNatCtEntry(OVS_CT_ENTRY *entry) >+{ >+ const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024; >+ const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535; >+ >+ uint16_t minPort; >+ uint16_t maxPort; >+ uint16_t firstPort; >+ >+ uint32_t hash = OvsNatHashRange(entry, 0); >+ >+ if ((entry->natInfo.natAction & NAT_ACTION_SRC) && >+ (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) { >+ firstPort = minPort = maxPort = ntohs(entry->key.src.port); >+ } else if ((entry->natInfo.natAction & NAT_ACTION_DST) && >+ (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) { >+ firstPort = minPort = maxPort = ntohs(entry->key.dst.port); >+ } else { >+ uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort; >+ uint16_t portIndex = (uint16_t) hash % (portDelta + 1); >+ firstPort = entry->natInfo.minPort + portIndex; >+ minPort = entry->natInfo.minPort; >+ maxPort = entry->natInfo.maxPort; >+ } >+ >+ uint32_t addrDelta = 0; >+ uint32_t addrIndex; >+ struct ct_addr ctAddr, maxCtAddr; >+ memset(&ctAddr, 0, sizeof ctAddr); >+ memset(&maxCtAddr, 0, sizeof maxCtAddr); >+ maxCtAddr = entry->natInfo.maxAddr; >+ >+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { >+ addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) - >+ ntohl(entry->natInfo.minAddr.ipv4_aligned); >+ addrIndex = hash % (addrDelta + 1); >+ ctAddr.ipv4_aligned = htonl( >+ ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex); >+ } else { >+ // XXX: IPv6 not supported >+ return FALSE; >+ } >+ >+ uint16_t port = firstPort; >+ BOOLEAN allPortsTried = FALSE; >+ BOOLEAN originalPortsTried = FALSE; >+ struct ct_addr firstAddr = ctAddr; >+ for (;;) { >+ if (entry->natInfo.natAction & NAT_ACTION_SRC) { >+ entry->rev_key.dst.addr = ctAddr; >+ entry->rev_key.dst.port = htons(port); >+ } else { >+ entry->rev_key.src.addr = ctAddr; >+ entry->rev_key.src.port = htons(port); >+ } >+ >+ OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE); >+ >+ if (!natEntry) { >+ natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry), >+ OVS_CT_POOL_TAG); >+ memcpy(&natEntry->key, &entry->key, >+ sizeof natEntry->key); >+ memcpy(&natEntry->value, &entry->rev_key, >+ sizeof natEntry->value); >+ natEntry->ctEntry = entry; >+ OvsNatAddEntry(natEntry); >+ return TRUE; >+ } else if (!allPortsTried) { >+ if (minPort == maxPort) { >+ allPortsTried = TRUE; >+ } else if (port == maxPort) { >+ port = minPort; >+ } else { >+ port++; >+ } >+ if (port == firstPort) { >+ allPortsTried = TRUE; >+ } >+ } else { >+ if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) { >+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { >+ ctAddr.ipv4_aligned = htonl( >+ ntohl(ctAddr.ipv4_aligned) + 1); >+ } else { >+ // XXX: IPv6 not supported >+ return FALSE; >+ } >+ } else { >+ ctAddr = entry->natInfo.minAddr; >+ } >+ if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) { >+ if (!originalPortsTried) { >+ originalPortsTried = TRUE; >+ ctAddr = entry->natInfo.minAddr; >+ minPort = MIN_NAT_EPHEMERAL_PORT; >+ maxPort = MAX_NAT_EPHEMERAL_PORT; >+ } else { >+ break; >+ } >+ } >+ firstPort = minPort; >+ port = firstPort; >+ allPortsTried = FALSE; >+ } >+ } >+ return FALSE; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatLookup >+ * Look up a NAT entry with the given key in the NAT table. >+ * If reverse is TRUE, look up a NAT entry with the given value instead. >+ *---------------------------------------------------------------------------- >+ */ >+POVS_NAT_ENTRY >+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) >+{ >+ PLIST_ENTRY link; >+ POVS_NAT_ENTRY entry; >+ >+ LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { >+ if (reverse) { >+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); >+ >+ if (OvsNatKeyAreSame(ctKey, &entry->value)) { >+ return entry; >+ } >+ } else { >+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); >+ >+ if (OvsNatKeyAreSame(ctKey, &entry->key)) { >+ return entry; >+ } >+ } >+ } >+ return NULL; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatDeleteEntry >+ * Delete a NAT entry. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatDeleteEntry(POVS_NAT_ENTRY entry) >+{ >+ if (entry == NULL) { >+ return; >+ } >+ RemoveEntryList(&entry->link); >+ RemoveEntryList(&entry->reverseLink); >+ OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatDeleteKey >+ * Delete a NAT entry with the given key. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatDeleteKey(const OVS_CT_KEY *key) >+{ >+ OvsNatDeleteEntry(OvsNatLookup(key, FALSE)); >+} >diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h >new file mode 100644 >index 0000000..99f2b67 >--- /dev/null >+++ b/datapath-windows/ovsext/Conntrack-nat.h >@@ -0,0 +1,39 @@ >+#ifndef _CONNTRACK_NAT_H >+#define _CONNTRACK_NAT_H >+ >+#include "precomp.h" >+#include "Flow.h" >+#include "Debug.h" >+#include <stddef.h> >+#include "Conntrack.h" >+ >+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10) >+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1) >+ >+typedef struct OVS_NAT_ENTRY { >+ LIST_ENTRY link; >+ LIST_ENTRY reverseLink; >+ OVS_CT_KEY key; >+ OVS_CT_KEY value; >+ POVS_CT_ENTRY ctEntry; >+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY; >+ >+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) { >+ return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST)); >+} >+ >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT); >+VOID OvsNatFlush(UINT16 zone); >+ >+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry); >+ >+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry); >+VOID OvsNatDeleteKey(const OVS_CT_KEY *key); >+VOID OvsNatCleanup(); >+ >+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse); >+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry); >+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry, >+ UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse); >+ >+#endif >\ No newline at end of file >-- >2.10.2.windows.1 >
Hi Sai, You are right. Locking is a little bit tricky here than in Conntrack.c because we cannot grab the lock during the entire Conntrack transaction. I will have to address thread safety in another patch. Best regards, Yin Lin On Thu, Mar 23, 2017 at 11:58 PM, Sairam Venugopal <vsairam@vmware.com> wrote: > Hi Yin, > > Thanks for sending over the patch. While it looks good for the most part, > there are certain indentations that need fixing. I will send those out > separately. > > I see that you have allocated - ovsNatLock and have added in stubs for > flush and cleanup similar to Conntrack.c. > > However, I don’t see you taking out the lock for accessing the NAT tables. > How are you verifying thread-safety here? > > Thanks, > Sairam > > > > > On 3/22/17, 3:11 PM, "ovs-dev-bounces@openvswitch.org on behalf of Yin > Lin" <ovs-dev-bounces@openvswitch.org on behalf of linyi@vmware.com> > wrote: > > >Signed-off-by: Yin Lin <linyi@vmware.com> > > > >Issue: # > >Change-Id: I6f37360c36525548b343f0016304015fec8aba7d > >--- > > datapath-windows/automake.mk | 2 + > > datapath-windows/ovsext/Conntrack-nat.c | 437 > ++++++++++++++++++++++++++++++++ > > datapath-windows/ovsext/Conntrack-nat.h | 39 +++ > > 3 files changed, 478 insertions(+) > > create mode 100644 datapath-windows/ovsext/Conntrack-nat.c > > create mode 100644 datapath-windows/ovsext/Conntrack-nat.h > > > >diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk > >index 53983ae..296e785 100644 > >--- a/datapath-windows/automake.mk > >+++ b/datapath-windows/automake.mk > >@@ -16,7 +16,9 @@ EXTRA_DIST += \ > > datapath-windows/ovsext/Conntrack-icmp.c \ > > datapath-windows/ovsext/Conntrack-other.c \ > > datapath-windows/ovsext/Conntrack-related.c \ > >+ datapath-windows/ovsext/Conntrack-nat.c \ > > datapath-windows/ovsext/Conntrack-tcp.c \ > >+ datapath-windows/ovsext/Conntrack-nat.h \ > > datapath-windows/ovsext/Conntrack.c \ > > datapath-windows/ovsext/Conntrack.h \ > > datapath-windows/ovsext/Datapath.c \ > >diff --git a/datapath-windows/ovsext/Conntrack-nat.c > b/datapath-windows/ovsext/Conntrack-nat.c > >new file mode 100644 > >index 0000000..4930694 > >--- /dev/null > >+++ b/datapath-windows/ovsext/Conntrack-nat.c > >@@ -0,0 +1,437 @@ > >+#include "Conntrack-nat.h" > >+#include "Jhash.h" > >+ > >+PLIST_ENTRY ovsNatTable = NULL; > >+PLIST_ENTRY ovsUnNatTable = NULL; > >+static PNDIS_RW_LOCK_EX ovsNatLock; > >+ > >+/* > >+ *----------------------------------------------------------- > ---------------- > >+ * OvsHashNatKey > >+ * Hash NAT related fields in a Conntrack key. > >+ *----------------------------------------------------------- > ---------------- > >+ */ > >+static __inline UINT32 > >+OvsHashNatKey(const OVS_CT_KEY *key) > >+{ > >+ UINT32 hash = 0; > >+#define HASH_ADD(field) \ > >+ hash = OvsJhashBytes(&key->field, sizeof(key->field), hash) > >+ > >+ HASH_ADD(src.addr.ipv4_aligned); > >+ HASH_ADD(dst.addr.ipv4_aligned); > >+ HASH_ADD(src.port); > >+ HASH_ADD(dst.port); > >+ HASH_ADD(zone); > >+#undef HASH_ADD > >+ return hash; > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ---------------- > >+ * OvsNatKeyAreSame > >+ * Compare NAT related fields in a Conntrack key. > >+ *----------------------------------------------------------- > ---------------- > >+ */ > >+static __inline BOOLEAN > >+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2) > >+{ > >+ // XXX: Compare IPv6 key as well > >+#define FIELD_COMPARE(field) \ > >+ if (key1->field != key2->field) return FALSE > >+ > >+ FIELD_COMPARE(src.addr.ipv4_aligned); > >+ FIELD_COMPARE(dst.addr.ipv4_aligned); > >+ FIELD_COMPARE(src.port); > >+ FIELD_COMPARE(dst.port); > >+ FIELD_COMPARE(zone); > >+ return TRUE; > >+#undef FIELD_COMPARE > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ---------------- > >+ * OvsNaGetBucket > >+ * Returns the row of NAT table that has the same hash as the given > NAT > >+ * hash key. If isReverse is TRUE, returns the row of reverse NAT > table > >+ * instead. > >+ *----------------------------------------------------------- > ---------------- > >+ */ > >+static __inline PLIST_ENTRY > >+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse) > >+{ > >+ uint32_t hash = OvsHashNatKey(key); > >+ if (isReverse) { > >+ return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK]; > >+ } else { > >+ return &ovsNatTable[hash & NAT_HASH_TABLE_MASK]; > >+ } > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ---------------- > >+ * OvsNatInit > >+ * Initialize NAT related resources. > >+ *----------------------------------------------------------- > ---------------- > >+ */ > >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context) > >+{ > >+ ASSERT(ovsNatTable == NULL); > >+ > >+ /* Init the sync-lock */ > >+ ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle); > >+ if (ovsNatLock == NULL) { > >+ goto failNoMem; > >+ } > >+ > >+ /* Init the Hash Buffer */ > >+ ovsNatTable = OvsAllocateMemoryWithTag( > >+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, > >+ OVS_CT_POOL_TAG); > >+ if (ovsNatTable == NULL) { > >+ goto freeNatLock; > >+ } > >+ > >+ ovsUnNatTable = OvsAllocateMemoryWithTag( > >+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, > >+ OVS_CT_POOL_TAG); > >+ if (ovsUnNatTable == NULL) { > >+ goto freeNatTable; > >+ } > >+ > >+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { > >+ InitializeListHead(&ovsNatTable[i]); > >+ InitializeListHead(&ovsUnNatTable[i]); > >+ } > >+ return STATUS_SUCCESS; > >+ > >+freeNatTable: > >+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); > >+freeNatLock: > >+ NdisFreeRWLock(ovsNatLock); > >+failNoMem: > >+ return STATUS_INSUFFICIENT_RESOURCES; > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatFlush > >+ * Flushes out all NAT entries that match the given zone. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+VOID OvsNatFlush(UINT16 zone) > >+{ > >+ LOCK_STATE_EX lockState; > >+ PLIST_ENTRY link, next; > >+ NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0); > >+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { > >+ LIST_FORALL_SAFE(&ovsNatTable[i], link, next) { > >+ POVS_NAT_ENTRY entry = > >+ CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); > >+ /* zone is a non-zero value */ > >+ if (!zone || zone == entry->key.zone) { > >+ OvsNatDeleteEntry(entry); > >+ } > >+ } > >+ } > >+ NdisReleaseRWLock(ovsNatLock, &lockState); > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatCleanup > >+ * Releases all NAT related resources. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+VOID OvsNatCleanup() > >+{ > >+ if (ovsNatTable == NULL) return; > >+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); > >+ OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); > >+ NdisFreeRWLock(ovsNatLock); > >+ ovsNatTable = NULL; > >+ ovsUnNatTable = NULL; > >+ ovsNatLock = NULL; > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatPacket > >+ * Performs NAT operation on the packet by replacing the > source/destinaton > >+ * address/port based on natAction. If reverse is TRUE, perform unNAT > >+ * instead. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+VOID > >+OvsNatPacket(OvsForwardingContext *ovsFwdCtx, > >+ const OVS_CT_ENTRY *entry, > >+ UINT16 natAction, > >+ OvsFlowKey *key, > >+ BOOLEAN reverse) > >+{ > >+ UINT32 natFlag; > >+ const struct ct_endpoint* endpoint; > >+ /* When it is NAT, only entry->rev_key contains NATTED address; > >+ When it is unNAT, only entry->key contains the UNNATTED address;*/ > >+ const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key; > >+ BOOLEAN isSrcNat; > >+ > >+ if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { > >+ return; > >+ } > >+ isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || > >+ ((natAction & NAT_ACTION_DST) && reverse)); > >+ > >+ if (isSrcNat) { > >+ /* Flag is set to SNAT for SNAT case and the reverse DNAT case */ > >+ natFlag = OVS_CS_F_SRC_NAT; > >+ /* Note that ctKey is the key in the other direction, so > >+ endpoint has to be reverted, i.e. ctKey->dst for SNAT > >+ and ctKey->src for DNAT */ > >+ endpoint = &ctKey->dst; > >+ } else { > >+ natFlag = OVS_CS_F_DST_NAT; > >+ endpoint = &ctKey->src; > >+ } > >+ key->ct.state |= natFlag; > >+ if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) { > >+ OvsUpdateAddressAndPort(ovsFwdCtx, > >+ endpoint->addr.ipv4_aligned, > >+ endpoint->port, isSrcNat); > >+ if (isSrcNat) { > >+ key->ipKey.nwSrc = endpoint->addr.ipv4_aligned; > >+ } else { > >+ key->ipKey.nwDst = endpoint->addr.ipv4_aligned; > >+ } > >+ } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ > >+ // XXX: IPv6 packet not supported yet. > >+ return; > >+ } > >+ if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { > >+ if (isSrcNat) { > >+ if (key->ipKey.l4.tpSrc != 0) { > >+ key->ipKey.l4.tpSrc = endpoint->port; > >+ } > >+ } else { > >+ if (key->ipKey.l4.tpDst != 0) { > >+ key->ipKey.l4.tpDst = endpoint->port; > >+ } > >+ } > >+ } > >+} > >+ > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatHashRange > >+ * Compute hash for a range of addresses specified in natInfo. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) > >+{ > >+ UINT32 hash = basis; > >+#define HASH_ADD(field) \ > >+ hash = OvsJhashBytes(&field, sizeof(field), hash) > >+ > >+ HASH_ADD(entry->natInfo.minAddr); > >+ HASH_ADD(entry->natInfo.maxAddr); > >+ HASH_ADD(entry->key.dl_type); > >+ HASH_ADD(entry->key.nw_proto); > >+ HASH_ADD(entry->key.zone); > >+#undef HASH_ADD > >+ return hash; > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatAddEntry > >+ * Add an entry to the NAT table. Also updates the reverse NAT lookup > >+ * table. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+VOID > >+OvsNatAddEntry(OVS_NAT_ENTRY* entry) > >+{ > >+ InsertHeadList(OvsNatGetBucket(&entry->key, FALSE), > >+ &entry->link); > >+ InsertHeadList(OvsNatGetBucket(&entry->value, TRUE), > >+ &entry->reverseLink); > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatCtEntry > >+ * Update an Conntrack entry with NAT information. Translated > address and > >+ * port will be generated and write back to the conntrack entry as a > >+ * result. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+BOOLEAN > >+OvsNatCtEntry(OVS_CT_ENTRY *entry) > >+{ > >+ const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024; > >+ const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535; > >+ > >+ uint16_t minPort; > >+ uint16_t maxPort; > >+ uint16_t firstPort; > >+ > >+ uint32_t hash = OvsNatHashRange(entry, 0); > >+ > >+ if ((entry->natInfo.natAction & NAT_ACTION_SRC) && > >+ (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) { > >+ firstPort = minPort = maxPort = ntohs(entry->key.src.port); > >+ } else if ((entry->natInfo.natAction & NAT_ACTION_DST) && > >+ (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) { > >+ firstPort = minPort = maxPort = ntohs(entry->key.dst.port); > >+ } else { > >+ uint16_t portDelta = entry->natInfo.maxPort - > entry->natInfo.minPort; > >+ uint16_t portIndex = (uint16_t) hash % (portDelta + 1); > >+ firstPort = entry->natInfo.minPort + portIndex; > >+ minPort = entry->natInfo.minPort; > >+ maxPort = entry->natInfo.maxPort; > >+ } > >+ > >+ uint32_t addrDelta = 0; > >+ uint32_t addrIndex; > >+ struct ct_addr ctAddr, maxCtAddr; > >+ memset(&ctAddr, 0, sizeof ctAddr); > >+ memset(&maxCtAddr, 0, sizeof maxCtAddr); > >+ maxCtAddr = entry->natInfo.maxAddr; > >+ > >+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { > >+ addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) - > >+ ntohl(entry->natInfo.minAddr.ipv4_aligned); > >+ addrIndex = hash % (addrDelta + 1); > >+ ctAddr.ipv4_aligned = htonl( > >+ ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex); > >+ } else { > >+ // XXX: IPv6 not supported > >+ return FALSE; > >+ } > >+ > >+ uint16_t port = firstPort; > >+ BOOLEAN allPortsTried = FALSE; > >+ BOOLEAN originalPortsTried = FALSE; > >+ struct ct_addr firstAddr = ctAddr; > >+ for (;;) { > >+ if (entry->natInfo.natAction & NAT_ACTION_SRC) { > >+ entry->rev_key.dst.addr = ctAddr; > >+ entry->rev_key.dst.port = htons(port); > >+ } else { > >+ entry->rev_key.src.addr = ctAddr; > >+ entry->rev_key.src.port = htons(port); > >+ } > >+ > >+ OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE); > >+ > >+ if (!natEntry) { > >+ natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry), > >+ OVS_CT_POOL_TAG); > >+ memcpy(&natEntry->key, &entry->key, > >+ sizeof natEntry->key); > >+ memcpy(&natEntry->value, &entry->rev_key, > >+ sizeof natEntry->value); > >+ natEntry->ctEntry = entry; > >+ OvsNatAddEntry(natEntry); > >+ return TRUE; > >+ } else if (!allPortsTried) { > >+ if (minPort == maxPort) { > >+ allPortsTried = TRUE; > >+ } else if (port == maxPort) { > >+ port = minPort; > >+ } else { > >+ port++; > >+ } > >+ if (port == firstPort) { > >+ allPortsTried = TRUE; > >+ } > >+ } else { > >+ if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) { > >+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { > >+ ctAddr.ipv4_aligned = htonl( > >+ ntohl(ctAddr.ipv4_aligned) + 1); > >+ } else { > >+ // XXX: IPv6 not supported > >+ return FALSE; > >+ } > >+ } else { > >+ ctAddr = entry->natInfo.minAddr; > >+ } > >+ if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) { > >+ if (!originalPortsTried) { > >+ originalPortsTried = TRUE; > >+ ctAddr = entry->natInfo.minAddr; > >+ minPort = MIN_NAT_EPHEMERAL_PORT; > >+ maxPort = MAX_NAT_EPHEMERAL_PORT; > >+ } else { > >+ break; > >+ } > >+ } > >+ firstPort = minPort; > >+ port = firstPort; > >+ allPortsTried = FALSE; > >+ } > >+ } > >+ return FALSE; > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatLookup > >+ * Look up a NAT entry with the given key in the NAT table. > >+ * If reverse is TRUE, look up a NAT entry with the given value > instead. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+POVS_NAT_ENTRY > >+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) > >+{ > >+ PLIST_ENTRY link; > >+ POVS_NAT_ENTRY entry; > >+ > >+ LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { > >+ if (reverse) { > >+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); > >+ > >+ if (OvsNatKeyAreSame(ctKey, &entry->value)) { > >+ return entry; > >+ } > >+ } else { > >+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); > >+ > >+ if (OvsNatKeyAreSame(ctKey, &entry->key)) { > >+ return entry; > >+ } > >+ } > >+ } > >+ return NULL; > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatDeleteEntry > >+ * Delete a NAT entry. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+VOID > >+OvsNatDeleteEntry(POVS_NAT_ENTRY entry) > >+{ > >+ if (entry == NULL) { > >+ return; > >+ } > >+ RemoveEntryList(&entry->link); > >+ RemoveEntryList(&entry->reverseLink); > >+ OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); > >+} > >+ > >+/* > >+ *----------------------------------------------------------- > ----------------- > >+ * OvsNatDeleteKey > >+ * Delete a NAT entry with the given key. > >+ *----------------------------------------------------------- > ----------------- > >+ */ > >+VOID > >+OvsNatDeleteKey(const OVS_CT_KEY *key) > >+{ > >+ OvsNatDeleteEntry(OvsNatLookup(key, FALSE)); > >+} > >diff --git a/datapath-windows/ovsext/Conntrack-nat.h > b/datapath-windows/ovsext/Conntrack-nat.h > >new file mode 100644 > >index 0000000..99f2b67 > >--- /dev/null > >+++ b/datapath-windows/ovsext/Conntrack-nat.h > >@@ -0,0 +1,39 @@ > >+#ifndef _CONNTRACK_NAT_H > >+#define _CONNTRACK_NAT_H > >+ > >+#include "precomp.h" > >+#include "Flow.h" > >+#include "Debug.h" > >+#include <stddef.h> > >+#include "Conntrack.h" > >+ > >+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10) > >+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1) > >+ > >+typedef struct OVS_NAT_ENTRY { > >+ LIST_ENTRY link; > >+ LIST_ENTRY reverseLink; > >+ OVS_CT_KEY key; > >+ OVS_CT_KEY value; > >+ POVS_CT_ENTRY ctEntry; > >+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY; > >+ > >+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) { > >+ return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST)); > >+} > >+ > >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT); > >+VOID OvsNatFlush(UINT16 zone); > >+ > >+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry); > >+ > >+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry); > >+VOID OvsNatDeleteKey(const OVS_CT_KEY *key); > >+VOID OvsNatCleanup(); > >+ > >+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse); > >+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry); > >+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY > *entry, > >+ UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse); > >+ > >+#endif > >\ No newline at end of file > >-- > >2.10.2.windows.1 > > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 53983ae..296e785 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -16,7 +16,9 @@ EXTRA_DIST += \ datapath-windows/ovsext/Conntrack-icmp.c \ datapath-windows/ovsext/Conntrack-other.c \ datapath-windows/ovsext/Conntrack-related.c \ + datapath-windows/ovsext/Conntrack-nat.c \ datapath-windows/ovsext/Conntrack-tcp.c \ + datapath-windows/ovsext/Conntrack-nat.h \ datapath-windows/ovsext/Conntrack.c \ datapath-windows/ovsext/Conntrack.h \ datapath-windows/ovsext/Datapath.c \ diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c new file mode 100644 index 0000000..4930694 --- /dev/null +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -0,0 +1,437 @@ +#include "Conntrack-nat.h" +#include "Jhash.h" + +PLIST_ENTRY ovsNatTable = NULL; +PLIST_ENTRY ovsUnNatTable = NULL; +static PNDIS_RW_LOCK_EX ovsNatLock; + +/* + *--------------------------------------------------------------------------- + * OvsHashNatKey + * Hash NAT related fields in a Conntrack key. + *--------------------------------------------------------------------------- + */ +static __inline UINT32 +OvsHashNatKey(const OVS_CT_KEY *key) +{ + UINT32 hash = 0; +#define HASH_ADD(field) \ + hash = OvsJhashBytes(&key->field, sizeof(key->field), hash) + + HASH_ADD(src.addr.ipv4_aligned); + HASH_ADD(dst.addr.ipv4_aligned); + HASH_ADD(src.port); + HASH_ADD(dst.port); + HASH_ADD(zone); +#undef HASH_ADD + return hash; +} + +/* + *--------------------------------------------------------------------------- + * OvsNatKeyAreSame + * Compare NAT related fields in a Conntrack key. + *--------------------------------------------------------------------------- + */ +static __inline BOOLEAN +OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2) +{ + // XXX: Compare IPv6 key as well +#define FIELD_COMPARE(field) \ + if (key1->field != key2->field) return FALSE + + FIELD_COMPARE(src.addr.ipv4_aligned); + FIELD_COMPARE(dst.addr.ipv4_aligned); + FIELD_COMPARE(src.port); + FIELD_COMPARE(dst.port); + FIELD_COMPARE(zone); + return TRUE; +#undef FIELD_COMPARE +} + +/* + *--------------------------------------------------------------------------- + * OvsNaGetBucket + * Returns the row of NAT table that has the same hash as the given NAT + * hash key. If isReverse is TRUE, returns the row of reverse NAT table + * instead. + *--------------------------------------------------------------------------- + */ +static __inline PLIST_ENTRY +OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse) +{ + uint32_t hash = OvsHashNatKey(key); + if (isReverse) { + return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK]; + } else { + return &ovsNatTable[hash & NAT_HASH_TABLE_MASK]; + } +} + +/* + *--------------------------------------------------------------------------- + * OvsNatInit + * Initialize NAT related resources. + *--------------------------------------------------------------------------- + */ +NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context) +{ + ASSERT(ovsNatTable == NULL); + + /* Init the sync-lock */ + ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle); + if (ovsNatLock == NULL) { + goto failNoMem; + } + + /* Init the Hash Buffer */ + ovsNatTable = OvsAllocateMemoryWithTag( + sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, + OVS_CT_POOL_TAG); + if (ovsNatTable == NULL) { + goto freeNatLock; + } + + ovsUnNatTable = OvsAllocateMemoryWithTag( + sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, + OVS_CT_POOL_TAG); + if (ovsUnNatTable == NULL) { + goto freeNatTable; + } + + for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsNatTable[i]); + InitializeListHead(&ovsUnNatTable[i]); + } + return STATUS_SUCCESS; + +freeNatTable: + OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); +freeNatLock: + NdisFreeRWLock(ovsNatLock); +failNoMem: + return STATUS_INSUFFICIENT_RESOURCES; +} + +/* + *---------------------------------------------------------------------------- + * OvsNatFlush + * Flushes out all NAT entries that match the given zone. + *---------------------------------------------------------------------------- + */ +VOID OvsNatFlush(UINT16 zone) +{ + LOCK_STATE_EX lockState; + PLIST_ENTRY link, next; + NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0); + for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { + LIST_FORALL_SAFE(&ovsNatTable[i], link, next) { + POVS_NAT_ENTRY entry = + CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); + /* zone is a non-zero value */ + if (!zone || zone == entry->key.zone) { + OvsNatDeleteEntry(entry); + } + } + } + NdisReleaseRWLock(ovsNatLock, &lockState); +} + +/* + *---------------------------------------------------------------------------- + * OvsNatCleanup + * Releases all NAT related resources. + *---------------------------------------------------------------------------- + */ +VOID OvsNatCleanup() +{ + if (ovsNatTable == NULL) return; + OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); + OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); + NdisFreeRWLock(ovsNatLock); + ovsNatTable = NULL; + ovsUnNatTable = NULL; + ovsNatLock = NULL; +} + +/* + *---------------------------------------------------------------------------- + * OvsNatPacket + * Performs NAT operation on the packet by replacing the source/destinaton + * address/port based on natAction. If reverse is TRUE, perform unNAT + * instead. + *---------------------------------------------------------------------------- + */ +VOID +OvsNatPacket(OvsForwardingContext *ovsFwdCtx, + const OVS_CT_ENTRY *entry, + UINT16 natAction, + OvsFlowKey *key, + BOOLEAN reverse) +{ + UINT32 natFlag; + const struct ct_endpoint* endpoint; + /* When it is NAT, only entry->rev_key contains NATTED address; + When it is unNAT, only entry->key contains the UNNATTED address;*/ + const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key; + BOOLEAN isSrcNat; + + if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { + return; + } + isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || + ((natAction & NAT_ACTION_DST) && reverse)); + + if (isSrcNat) { + /* Flag is set to SNAT for SNAT case and the reverse DNAT case */ + natFlag = OVS_CS_F_SRC_NAT; + /* Note that ctKey is the key in the other direction, so + endpoint has to be reverted, i.e. ctKey->dst for SNAT + and ctKey->src for DNAT */ + endpoint = &ctKey->dst; + } else { + natFlag = OVS_CS_F_DST_NAT; + endpoint = &ctKey->src; + } + key->ct.state |= natFlag; + if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) { + OvsUpdateAddressAndPort(ovsFwdCtx, + endpoint->addr.ipv4_aligned, + endpoint->port, isSrcNat); + if (isSrcNat) { + key->ipKey.nwSrc = endpoint->addr.ipv4_aligned; + } else { + key->ipKey.nwDst = endpoint->addr.ipv4_aligned; + } + } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ + // XXX: IPv6 packet not supported yet. + return; + } + if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { + if (isSrcNat) { + if (key->ipKey.l4.tpSrc != 0) { + key->ipKey.l4.tpSrc = endpoint->port; + } + } else { + if (key->ipKey.l4.tpDst != 0) { + key->ipKey.l4.tpDst = endpoint->port; + } + } + } +} + + +/* + *---------------------------------------------------------------------------- + * OvsNatHashRange + * Compute hash for a range of addresses specified in natInfo. + *---------------------------------------------------------------------------- + */ +static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) +{ + UINT32 hash = basis; +#define HASH_ADD(field) \ + hash = OvsJhashBytes(&field, sizeof(field), hash) + + HASH_ADD(entry->natInfo.minAddr); + HASH_ADD(entry->natInfo.maxAddr); + HASH_ADD(entry->key.dl_type); + HASH_ADD(entry->key.nw_proto); + HASH_ADD(entry->key.zone); +#undef HASH_ADD + return hash; +} + +/* + *---------------------------------------------------------------------------- + * OvsNatAddEntry + * Add an entry to the NAT table. Also updates the reverse NAT lookup + * table. + *---------------------------------------------------------------------------- + */ +VOID +OvsNatAddEntry(OVS_NAT_ENTRY* entry) +{ + InsertHeadList(OvsNatGetBucket(&entry->key, FALSE), + &entry->link); + InsertHeadList(OvsNatGetBucket(&entry->value, TRUE), + &entry->reverseLink); +} + +/* + *---------------------------------------------------------------------------- + * OvsNatCtEntry + * Update an Conntrack entry with NAT information. Translated address and + * port will be generated and write back to the conntrack entry as a + * result. + *---------------------------------------------------------------------------- + */ +BOOLEAN +OvsNatCtEntry(OVS_CT_ENTRY *entry) +{ + const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024; + const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535; + + uint16_t minPort; + uint16_t maxPort; + uint16_t firstPort; + + uint32_t hash = OvsNatHashRange(entry, 0); + + if ((entry->natInfo.natAction & NAT_ACTION_SRC) && + (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) { + firstPort = minPort = maxPort = ntohs(entry->key.src.port); + } else if ((entry->natInfo.natAction & NAT_ACTION_DST) && + (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) { + firstPort = minPort = maxPort = ntohs(entry->key.dst.port); + } else { + uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort; + uint16_t portIndex = (uint16_t) hash % (portDelta + 1); + firstPort = entry->natInfo.minPort + portIndex; + minPort = entry->natInfo.minPort; + maxPort = entry->natInfo.maxPort; + } + + uint32_t addrDelta = 0; + uint32_t addrIndex; + struct ct_addr ctAddr, maxCtAddr; + memset(&ctAddr, 0, sizeof ctAddr); + memset(&maxCtAddr, 0, sizeof maxCtAddr); + maxCtAddr = entry->natInfo.maxAddr; + + if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { + addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) - + ntohl(entry->natInfo.minAddr.ipv4_aligned); + addrIndex = hash % (addrDelta + 1); + ctAddr.ipv4_aligned = htonl( + ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex); + } else { + // XXX: IPv6 not supported + return FALSE; + } + + uint16_t port = firstPort; + BOOLEAN allPortsTried = FALSE; + BOOLEAN originalPortsTried = FALSE; + struct ct_addr firstAddr = ctAddr; + for (;;) { + if (entry->natInfo.natAction & NAT_ACTION_SRC) { + entry->rev_key.dst.addr = ctAddr; + entry->rev_key.dst.port = htons(port); + } else { + entry->rev_key.src.addr = ctAddr; + entry->rev_key.src.port = htons(port); + } + + OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE); + + if (!natEntry) { + natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry), + OVS_CT_POOL_TAG); + memcpy(&natEntry->key, &entry->key, + sizeof natEntry->key); + memcpy(&natEntry->value, &entry->rev_key, + sizeof natEntry->value); + natEntry->ctEntry = entry; + OvsNatAddEntry(natEntry); + return TRUE; + } else if (!allPortsTried) { + if (minPort == maxPort) { + allPortsTried = TRUE; + } else if (port == maxPort) { + port = minPort; + } else { + port++; + } + if (port == firstPort) { + allPortsTried = TRUE; + } + } else { + if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) { + if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { + ctAddr.ipv4_aligned = htonl( + ntohl(ctAddr.ipv4_aligned) + 1); + } else { + // XXX: IPv6 not supported + return FALSE; + } + } else { + ctAddr = entry->natInfo.minAddr; + } + if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) { + if (!originalPortsTried) { + originalPortsTried = TRUE; + ctAddr = entry->natInfo.minAddr; + minPort = MIN_NAT_EPHEMERAL_PORT; + maxPort = MAX_NAT_EPHEMERAL_PORT; + } else { + break; + } + } + firstPort = minPort; + port = firstPort; + allPortsTried = FALSE; + } + } + return FALSE; +} + +/* + *---------------------------------------------------------------------------- + * OvsNatLookup + * Look up a NAT entry with the given key in the NAT table. + * If reverse is TRUE, look up a NAT entry with the given value instead. + *---------------------------------------------------------------------------- + */ +POVS_NAT_ENTRY +OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) +{ + PLIST_ENTRY link; + POVS_NAT_ENTRY entry; + + LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { + if (reverse) { + entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); + + if (OvsNatKeyAreSame(ctKey, &entry->value)) { + return entry; + } + } else { + entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); + + if (OvsNatKeyAreSame(ctKey, &entry->key)) { + return entry; + } + } + } + return NULL; +} + +/* + *---------------------------------------------------------------------------- + * OvsNatDeleteEntry + * Delete a NAT entry. + *---------------------------------------------------------------------------- + */ +VOID +OvsNatDeleteEntry(POVS_NAT_ENTRY entry) +{ + if (entry == NULL) { + return; + } + RemoveEntryList(&entry->link); + RemoveEntryList(&entry->reverseLink); + OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); +} + +/* + *---------------------------------------------------------------------------- + * OvsNatDeleteKey + * Delete a NAT entry with the given key. + *---------------------------------------------------------------------------- + */ +VOID +OvsNatDeleteKey(const OVS_CT_KEY *key) +{ + OvsNatDeleteEntry(OvsNatLookup(key, FALSE)); +} diff --git a/datapath-windows/ovsext/Conntrack-nat.h b/datapath-windows/ovsext/Conntrack-nat.h new file mode 100644 index 0000000..99f2b67 --- /dev/null +++ b/datapath-windows/ovsext/Conntrack-nat.h @@ -0,0 +1,39 @@ +#ifndef _CONNTRACK_NAT_H +#define _CONNTRACK_NAT_H + +#include "precomp.h" +#include "Flow.h" +#include "Debug.h" +#include <stddef.h> +#include "Conntrack.h" + +#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10) +#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1) + +typedef struct OVS_NAT_ENTRY { + LIST_ENTRY link; + LIST_ENTRY reverseLink; + OVS_CT_KEY key; + OVS_CT_KEY value; + POVS_CT_ENTRY ctEntry; +} OVS_NAT_ENTRY, *POVS_NAT_ENTRY; + +__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) { + return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST)); +} + +NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT); +VOID OvsNatFlush(UINT16 zone); + +VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry); + +VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry); +VOID OvsNatDeleteKey(const OVS_CT_KEY *key); +VOID OvsNatCleanup(); + +POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse); +BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry); +VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry, + UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse); + +#endif \ No newline at end of file
Signed-off-by: Yin Lin <linyi@vmware.com> Issue: # Change-Id: I6f37360c36525548b343f0016304015fec8aba7d --- datapath-windows/automake.mk | 2 + datapath-windows/ovsext/Conntrack-nat.c | 437 ++++++++++++++++++++++++++++++++ datapath-windows/ovsext/Conntrack-nat.h | 39 +++ 3 files changed, 478 insertions(+) create mode 100644 datapath-windows/ovsext/Conntrack-nat.c create mode 100644 datapath-windows/ovsext/Conntrack-nat.h