From patchwork Tue May 9 22:59:49 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yin Lin X-Patchwork-Id: 760362 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3wMvwg4lVsz9s06 for ; Wed, 10 May 2017 09:00:35 +1000 (AEST) Received: from mail.linux-foundation.org (localhost [127.0.0.1]) by mail.linuxfoundation.org (Postfix) with ESMTP id F2E33B55; Tue, 9 May 2017 22:59:57 +0000 (UTC) X-Original-To: dev@openvswitch.org Delivered-To: ovs-dev@mail.linuxfoundation.org Received: from smtp1.linuxfoundation.org (smtp1.linux-foundation.org [172.17.192.35]) by mail.linuxfoundation.org (Postfix) with ESMTPS id A241549B for ; Tue, 9 May 2017 22:59:55 +0000 (UTC) X-Greylist: domain auto-whitelisted by SQLgrey-1.7.6 Received: from EX13-EDG-OU-001.vmware.com (ex13-edg-ou-001.vmware.com [208.91.0.189]) by smtp1.linuxfoundation.org (Postfix) with ESMTPS id ACB38168 for ; Tue, 9 May 2017 22:59:53 +0000 (UTC) Received: from sc9-mailhost1.vmware.com (10.113.161.71) by EX13-EDG-OU-001.vmware.com (10.113.208.155) with Microsoft SMTP Server id 15.0.1156.6; Tue, 9 May 2017 15:58:46 -0700 Received: from linyi-win10.vmware.com (htb-1s-eng-dhcp177.eng.vmware.com [10.33.78.177]) by sc9-mailhost1.vmware.com (Postfix) with ESMTP id 8C53F18408; Tue, 9 May 2017 15:59:52 -0700 (PDT) From: Yin Lin To: Date: Tue, 9 May 2017 15:59:49 -0700 Message-ID: <20170509225950.9404-3-linyi@vmware.com> X-Mailer: git-send-email 2.10.2.windows.1 In-Reply-To: <20170509225950.9404-1-linyi@vmware.com> References: <20170509225950.9404-1-linyi@vmware.com> MIME-Version: 1.0 Received-SPF: None (EX13-EDG-OU-001.vmware.com: linyi@vmware.com does not designate permitted sender hosts) X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_NONE, RP_MATCHES_RCVD autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on smtp1.linux-foundation.org Cc: Yin Lin Subject: [ovs-dev] [PATCH v7 3/4] datapath-windows: NAT integration with conntrack X-BeenThere: ovs-dev@openvswitch.org X-Mailman-Version: 2.1.12 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org This patch integrates NAT module with existing conntrack module. NAT action is now supported. Signed-off-by: Yin Lin --- datapath-windows/automake.mk | 4 +- datapath-windows/ovsext/Actions.c | 119 ++++++++++++++++++++- datapath-windows/ovsext/Actions.h | 20 ++++ datapath-windows/ovsext/Conntrack.c | 187 +++++++++++++++++++++------------ datapath-windows/ovsext/Conntrack.h | 25 +++-- datapath-windows/ovsext/ovsext.vcxproj | 2 + 6 files changed, 274 insertions(+), 83 deletions(-) diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 7177630..f1632cc 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -16,9 +16,9 @@ EXTRA_DIST += \ datapath-windows/ovsext/Conntrack-icmp.c \ datapath-windows/ovsext/Conntrack-other.c \ datapath-windows/ovsext/Conntrack-related.c \ - datapath-windows/ovsext/Conntrack-nat.c \ + datapath-windows/ovsext/Conntrack-nat.c \ datapath-windows/ovsext/Conntrack-tcp.c \ - datapath-windows/ovsext/Conntrack-nat.h \ + datapath-windows/ovsext/Conntrack-nat.h \ datapath-windows/ovsext/Conntrack.c \ datapath-windows/ovsext/Conntrack.h \ datapath-windows/ovsext/Datapath.c \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index e2eae9a..a808d2c 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1380,7 +1380,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, * based on the specified key. *---------------------------------------------------------------------------- */ -static __inline NDIS_STATUS +NDIS_STATUS OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, const struct ovs_key_udp *udpAttr) { @@ -1427,7 +1427,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, * based on the specified key. *---------------------------------------------------------------------------- */ -static __inline NDIS_STATUS +NDIS_STATUS OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, const struct ovs_key_tcp *tcpAttr) { @@ -1465,12 +1465,125 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, /* *---------------------------------------------------------------------------- + * OvsUpdateAddressAndPort -- + * Updates the source/destination IP and port fields in + * ovsFwdCtx.curNbl inline based on the specified key. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, + UINT32 newAddr, UINT16 newPort, + BOOLEAN isSource, BOOLEAN isTx) +{ + PUINT8 bufferStart; + UINT32 hdrSize; + OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; + IPHdr *ipHdr; + TCPHdr *tcpHdr = NULL; + UDPHdr *udpHdr = NULL; + UINT32 *addrField = NULL; + UINT16 *portField = NULL; + UINT16 *checkField = NULL; + BOOLEAN l4Offload = FALSE; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + + ASSERT(layers->value != 0); + + if (layers->isTcp || layers->isUdp) { + hdrSize = layers->l4Offset + + layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr); + } else { + hdrSize = layers->l3Offset + sizeof (*ipHdr); + } + + bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize); + if (!bufferStart) { + return NDIS_STATUS_RESOURCES; + } + + ipHdr = (IPHdr *)(bufferStart + layers->l3Offset); + + if (layers->isTcp) { + tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset); + } else if (layers->isUdp) { + udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset); + } + + csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl, + TcpIpChecksumNetBufferListInfo); + /* + * Adjust the IP header inline as dictated by the action, and also update + * the IP and the TCP checksum for the data modified. + * + * In the future, this could be optimized to make one call to + * ChecksumUpdate32(). Ignoring this for now, since for the most common + * case, we only update the TTL. + */ + + if (isSource) { + addrField = &ipHdr->saddr; + if (tcpHdr) { + portField = &tcpHdr->source; + checkField = &tcpHdr->check; + l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.TcpChecksum : + ((BOOLEAN)csumInfo.Receive.TcpChecksumSucceeded || + (BOOLEAN)csumInfo.Receive.TcpChecksumFailed); + } else if (udpHdr) { + portField = &udpHdr->source; + checkField = &udpHdr->check; + l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.UdpChecksum : + ((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded || + (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); + } + } else { + addrField = &ipHdr->daddr; + if (tcpHdr) { + portField = &tcpHdr->dest; + checkField = &tcpHdr->check; + } else if (udpHdr) { + portField = &udpHdr->dest; + checkField = &udpHdr->check; + } + } + if (*addrField != newAddr) { + UINT32 oldAddr = *addrField; + if (checkField && *checkField != 0) { + if (l4Offload) { + /* Recompute IP pseudo checksum */ + *checkField = ~(*checkField); + *checkField = ChecksumUpdate32(*checkField, oldAddr, + newAddr); + *checkField = ~(*checkField); + } else { + *checkField = ChecksumUpdate32(*checkField, oldAddr, + newAddr); + } + } + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr, + newAddr); + } + *addrField = newAddr; + } + + if (portField && *portField != newPort) { + if (checkField && !l4Offload) { + *checkField = ChecksumUpdate16(*checkField, *portField, + newPort); + } + *portField = newPort; + } + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- * OvsUpdateIPv4Header -- * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the * specified key. *---------------------------------------------------------------------------- */ -static __inline NDIS_STATUS +NDIS_STATUS OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, const struct ovs_key_ipv4 *ipAttr) { diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h index 1ce6c20..fd050d5 100644 --- a/datapath-windows/ovsext/Actions.h +++ b/datapath-windows/ovsext/Actions.h @@ -110,4 +110,24 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext, UINT32 srcPortNo, OVS_PACKET_HDR_INFO *layers); +PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, + UINT32 size); + +NDIS_STATUS +OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_udp *udpAttr); + +NDIS_STATUS +OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_tcp *tcpAttr); + +NDIS_STATUS +OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_ipv4 *ipAttr); + +NDIS_STATUS +OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, + UINT32 newAddr, UINT16 newPort, + BOOLEAN isSource, BOOLEAN isTx); + #endif /* __ACTIONS_H_ */ diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 9824368..6e95f47 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -19,6 +19,7 @@ #include "Jhash.h" #include "PacketParser.h" #include "Event.h" +#include "Conntrack-nat.h" #pragma warning(push) #pragma warning(disable:4311) @@ -27,7 +28,7 @@ #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 1000000000LL -KSTART_ROUTINE ovsConntrackEntryCleaner; +KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; @@ -72,7 +73,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) /* Init CT Cleaner Thread */ KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE); status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL, - NULL, ovsConntrackEntryCleaner, + NULL, OvsConntrackEntryCleaner, &ctThreadCtx); if (status != STATUS_SUCCESS) { @@ -89,6 +90,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) &ctThreadCtx.threadObject, NULL); ZwClose(threadHandle); threadHandle = NULL; + + status = OvsNatInit(); + + if (status != STATUS_SUCCESS) { + OvsCleanupConntrack(); + return status; + } return STATUS_SUCCESS; } @@ -121,6 +129,7 @@ OvsCleanupConntrack(VOID) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; + OvsNatCleanup(); } static __inline VOID @@ -160,25 +169,42 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type) OvsPostCtEvent(&ctEventEntry); } -static __inline VOID -OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now) +static __inline BOOLEAN +OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, + PNAT_ACTION_INFO natInfo, UINT64 now) { - NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY)); - NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY)); + NdisMoveMemory(&entry->key, &ctx->key, sizeof(OVS_CT_KEY)); + NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY)); OvsCtKeyReverse(&entry->rev_key); + + /* NatInfo is always initialized to be disabled, so that if NAT action + * fails, we will not end up deleting an non-existent NAT entry. + */ + if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) { + entry->natInfo = *natInfo; + if (!OvsNatCtEntry(entry)) { + return FALSE; + } + ctx->hash = OvsHashCtKey(&entry->key); + } else { + entry->natInfo.natAction = natInfo->natAction; + } + entry->timestampStart = now; InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], &entry->link); ctTotalEntries++; + return TRUE; } static __inline POVS_CT_ENTRY -OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, +OvsCtEntryCreate(OvsForwardingContext *fwdCtx, UINT8 ipProto, UINT32 l4Offset, OvsConntrackKeyLookupCtx *ctx, OvsFlowKey *key, + PNAT_ACTION_INFO natInfo, BOOLEAN commit, UINT64 currentTime, BOOLEAN *entryCreated) @@ -186,6 +212,7 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, POVS_CT_ENTRY entry = NULL; *entryCreated = FALSE; UINT32 state = 0; + PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; switch (ipProto) { case IPPROTO_TCP: @@ -213,12 +240,9 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, if (parentEntry != NULL) { entry->parent = parentEntry; } - OvsCtAddEntry(entry, ctx, currentTime); *entryCreated = TRUE; } - - OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); - return entry; + break; } case IPPROTO_ICMP: { @@ -232,35 +256,31 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, state |= OVS_CS_F_NEW; if (commit) { entry = OvsConntrackCreateIcmpEntry(currentTime); - if (!entry) { - return NULL; - } - OvsCtAddEntry(entry, ctx, currentTime); *entryCreated = TRUE; } - - OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); - return entry; + break; } case IPPROTO_UDP: { state |= OVS_CS_F_NEW; if (commit) { entry = OvsConntrackCreateOtherEntry(currentTime); - if (!entry) { - return NULL; - } - OvsCtAddEntry(entry, ctx, currentTime); *entryCreated = TRUE; } - - OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); - return entry; + break; } default: goto invalid; } + if (commit && !entry) { + return NULL; + } + if (entry && !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) { + return NULL; + } + OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); + return entry; invalid: state |= OVS_CS_F_INVALID; OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); @@ -269,11 +289,11 @@ invalid: static enum CT_UPDATE_RES OvsCtUpdateEntry(OVS_CT_ENTRY* entry, - PNET_BUFFER_LIST nbl, - UINT8 ipProto, - UINT32 l4Offset, - BOOLEAN reply, - UINT64 now) + PNET_BUFFER_LIST nbl, + UINT8 ipProto, + UINT32 l4Offset, + BOOLEAN reply, + UINT64 now) { switch (ipProto) { @@ -299,6 +319,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, static __inline VOID OvsCtEntryDelete(POVS_CT_ENTRY entry) { + if (entry == NULL) { + return; + } + if (entry->natInfo.natAction) { + OvsNatDeleteKey(&entry->key); + } OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(&entry->link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); @@ -308,10 +334,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry) static __inline BOOLEAN OvsCtEntryExpired(POVS_CT_ENTRY entry) { - if (entry == NULL) { - return TRUE; - } - UINT64 currentTime; NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime); return entry->expiration < currentTime; @@ -346,7 +368,7 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx, return NDIS_STATUS_NOT_SUPPORTED; } -static __inline BOOLEAN +BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey) { return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) && @@ -372,13 +394,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl) } } -static __inline POVS_CT_ENTRY +POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) { PLIST_ENTRY link; POVS_CT_ENTRY entry; BOOLEAN reply = FALSE; POVS_CT_ENTRY found = NULL; + OVS_CT_KEY key = ctx->key; if (!ctTotalEntries) { return found; @@ -387,13 +410,19 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) { entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link); - if (OvsCtKeyAreSame(ctx->key,entry->key)) { + if (OvsCtKeyAreSame(key,entry->key)) { found = entry; reply = FALSE; break; } - if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) { + /* Reverse NAT must be performed before OvsCtLookup, so here + * we simply need to flip the src and dst in key and compare + * they are equal. Note that flipped key is not equal to + * rev_key due to NAT effect. + */ + OvsCtKeyReverse(&key); + if (OvsCtKeyAreSame(key, entry->key)) { found = entry; reply = TRUE; break; @@ -412,17 +441,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -static __inline UINT32 -OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx) +UINT32 +OvsHashCtKey(const OVS_CT_KEY *key) { - UINT32 hsrc, hdst,hash; - hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0); - hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0); + UINT32 hsrc, hdst, hash; + hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0); + hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0); hash = hsrc ^ hdst; /* TO identify reverse traffic */ - return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1, - ((uint32_t *) (&ctx->key + 1) - - (uint32_t *) (&ctx->key.dst + 1)), + hash = OvsJhashBytes((uint32_t *) &key->dst + 1, + ((uint32_t *) (key + 1) - + (uint32_t *) (&key->dst + 1)), hash); + return hash; } static UINT8 @@ -453,6 +483,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, PNET_BUFFER_LIST curNbl, UINT32 l4Offset) { + const OVS_NAT_ENTRY *natEntry; ctx->key.zone = zone; ctx->key.dl_type = flowKey->l2.dlType; ctx->related = FALSE; @@ -514,7 +545,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, return NDIS_STATUS_INVALID_PACKET; } - ctx->hash = OvsExtractLookupCtxHash(ctx); + natEntry = OvsNatLookup(&ctx->key, TRUE); + if (natEntry) { + /* Translate address first for reverse NAT */ + ctx->key = natEntry->ctEntry->key; + OvsCtKeyReverse(&ctx->key); + } + + ctx->hash = OvsHashCtKey(&ctx->key); return NDIS_STATUS_SUCCESS; } @@ -532,17 +570,19 @@ OvsDetectFtpPacket(OvsFlowKey *key) { *---------------------------------------------------------------------------- */ static __inline POVS_CT_ENTRY -OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl, +OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx, UINT32 l4Offset, OvsConntrackKeyLookupCtx *ctx, OvsFlowKey *key, UINT16 zone, + NAT_ACTION_INFO *natInfo, BOOLEAN commit, UINT64 currentTime, BOOLEAN *entryCreated) { POVS_CT_ENTRY entry = ctx->entry; UINT32 state = 0; + PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; *entryCreated = FALSE; /* If an entry was found, update the state based on TCP flags */ @@ -569,8 +609,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl, //Delete and update the Conntrack OvsCtEntryDelete(ctx->entry); ctx->entry = NULL; - entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset, - ctx, key, commit, currentTime, + entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset, + ctx, key, natInfo, commit, currentTime, entryCreated); if (!entry) { return NULL; @@ -637,7 +677,7 @@ OvsConntrackSetLabels(OvsFlowKey *key, } static __inline NDIS_STATUS -OvsCtExecute_(PNET_BUFFER_LIST curNbl, +OvsCtExecute_(OvsForwardingContext *fwdCtx, OvsFlowKey *key, OVS_PACKET_HDR_INFO *layers, BOOLEAN commit, @@ -650,13 +690,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl, { NDIS_STATUS status = NDIS_STATUS_SUCCESS; POVS_CT_ENTRY entry = NULL; + PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; OvsConntrackKeyLookupCtx ctx = { 0 }; LOCK_STATE_EX lockState; UINT64 currentTime; NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime); - /* XXX: Not referenced for now */ - UNREFERENCED_PARAMETER(natInfo); /* Retrieve the Conntrack Key related fields from packet */ OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset); @@ -675,18 +714,31 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl, if (!entry) { /* If no matching entry was found, create one and add New state */ - entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, + entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, layers->l4Offset, &ctx, - key, commit, currentTime, + key, natInfo, commit, currentTime, &entryCreated); } else { /* Process the entry and update CT flags */ OvsCtIncrementCounters(entry, ctx.reply, curNbl); - entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key, - zone, commit, currentTime, + entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key, + zone, natInfo, commit, currentTime, &entryCreated); } + /* + * Note that natInfo is not the same as entry->natInfo here. natInfo + * is decided by action in the openflow rule, entry->natInfo is decided + * when the entry is created. In the reverse NAT case, natInfo is + * NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or + * NAT_ACTION_DST without NAT_ACTION_REVERSE + */ + if (entry && natInfo->natAction != NAT_ACTION_NONE) + { + OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction, + key, ctx.reply); + } + if (entry && mark) { OvsConntrackSetMark(key, entry, mark->value, mark->mask); } @@ -735,10 +787,8 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, MD_LABELS *labels = NULL; PCHAR helper = NULL; NAT_ACTION_INFO natActionInfo; - PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; OVS_PACKET_HDR_INFO *layers = &fwdCtx->layers; PNET_BUFFER_LIST newNbl = NULL; - NAT_ACTION_INFO natActionInfo; NDIS_STATUS status; memset(&natActionInfo, 0, sizeof natActionInfo); @@ -775,12 +825,12 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, BOOLEAN hasMaxIp = FALSE; BOOLEAN hasMaxPort = FALSE; NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { - enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr); - switch(sub_type_nest) { + enum ovs_nat_attr subtype = NlAttrType(natAttr); + switch(subtype) { case OVS_NAT_ATTR_SRC: case OVS_NAT_ATTR_DST: natActionInfo.natAction |= - ((sub_type_nest == OVS_NAT_ATTR_SRC) + ((subtype == OVS_NAT_ATTR_SRC) ? NAT_ACTION_SRC : NAT_ACTION_DST); break; case OVS_NAT_ATTR_IP_MIN: @@ -844,19 +894,19 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, commit = TRUE; } /* If newNbl is not allocated, use the current Nbl*/ - status = OvsCtExecute_(newNbl != NULL ? newNbl : curNbl, key, layers, + status = OvsCtExecute_(fwdCtx, key, layers, commit, force, zone, mark, labels, helper, &natActionInfo); return status; } /* *---------------------------------------------------------------------------- - * ovsConntrackEntryCleaner + * OvsConntrackEntryCleaner * Runs periodically and cleans up the connection tracker *---------------------------------------------------------------------------- */ VOID -ovsConntrackEntryCleaner(PVOID data) +OvsConntrackEntryCleaner(PVOID data) { POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data; @@ -873,15 +923,13 @@ ovsConntrackEntryCleaner(PVOID data) } /* Set the timeout for the thread and cleanup */ - UINT64 currentTime, threadSleepTimeout; - NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime); - threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL; + INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL; if (ctTotalEntries) { for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link); - if (entry->expiration < currentTime) { + if (entry && OvsCtEntryExpired(entry)) { OvsCtEntryDelete(entry); } } @@ -921,6 +969,7 @@ OvsCtFlush(UINT16 zone) } } + OvsNatFlush(zone); NdisReleaseRWLock(ovsConntrackLockObj, &lockState); return NDIS_STATUS_SUCCESS; } diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index 1ad289f..fcdd96e 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -18,8 +18,9 @@ #define __OVS_CONNTRACK_H_ 1 #include "precomp.h" -#include "Flow.h" +#include "Actions.h" #include "Debug.h" +#include "Flow.h" #include "Actions.h" #include @@ -87,6 +88,14 @@ typedef struct _OVS_CT_KEY { UINT64 byteCount; } OVS_CT_KEY, *POVS_CT_KEY; +typedef struct _NAT_ACTION_INFO { + struct ct_addr minAddr; + struct ct_addr maxAddr; + uint16_t minPort; + uint16_t maxPort; + uint16_t natAction; +} NAT_ACTION_INFO, *PNAT_ACTION_INFO; + typedef struct OVS_CT_ENTRY { OVS_CT_KEY key; OVS_CT_KEY rev_key; @@ -95,6 +104,7 @@ typedef struct OVS_CT_ENTRY { UINT32 mark; UINT64 timestampStart; struct ovs_key_ct_labels labels; + NAT_ACTION_INFO natInfo; PVOID parent; /* Points to main connection */ } OVS_CT_ENTRY, *POVS_CT_ENTRY; @@ -119,14 +129,6 @@ typedef struct OvsConntrackKeyLookupCtx { BOOLEAN related; } OvsConntrackKeyLookupCtx; -typedef struct _NAT_ACTION_INFO { - struct ct_addr minAddr; - struct ct_addr maxAddr; - uint16_t minPort; - uint16_t maxPort; - uint16_t natAction; -} NAT_ACTION_INFO, *PNAT_ACTION_INFO; - #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10) #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1) #define CT_INTERVAL_SEC 10000000LL //1s @@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, POVS_CT_ENTRY entry, BOOLEAN request); +UINT32 OvsHashCtKey(const OVS_CT_KEY *key); +BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey); +POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx); + + #endif /* __OVS_CONNTRACK_H_ */ diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index ecfc0b8..69dfb56 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -103,6 +103,7 @@ + @@ -257,6 +258,7 @@ +