@@ -620,6 +620,154 @@ OvsDoFlowLookupOutput(OvsForwardingContext* ovsFwdCtx)
return status;
}
+VOID
+OvsEncapPktCB(PNET_BUFFER_LIST nbl,
+ UINT32 inPort,
+ PVOID tunnelKey,
+ PVOID cbData1,
+ PVOID cbData2,
+ NTSTATUS status,
+ POVS_FWD_INFO fwdInfo)
+{
+ POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)cbData1;
+ OvsIPTunnelKey *tunKey = (OvsIPTunnelKey *)tunnelKey;
+ OvsForwardingContext ovsFwdCtx = { 0 };
+ BOOLEAN isDispatchLevel = KeGetCurrentIrql() == DISPATCH_LEVEL;
+ LOCK_STATE_EX lockState, dpLockState;
+ PNET_BUFFER curNb;
+ char ipAddrStr[64] = { 0 };
+
+ UNREFERENCED_PARAMETER(inPort);
+ UNREFERENCED_PARAMETER(cbData2);
+
+ if (fwdInfo->dstIphAddr.si_family == AF_INET) {
+ RtlIpv4AddressToStringA(&fwdInfo->dstIphAddr.Ipv4.sin_addr,
+ ipAddrStr);
+ } else if (fwdInfo->dstIphAddr.si_family == AF_INET6) {
+ RtlIpv6AddressToStringA(&fwdInfo->dstIphAddr.Ipv6.sin6_addr,
+ ipAddrStr);
+ }
+ OVS_LOG_INFO("Resolve IP %s MAC %02x:%02x:%02x:%02x:%02x:%02x "
+ "status %x, nbl %p, vport %p", ipAddrStr,
+ fwdInfo->dstMacAddr[0], fwdInfo->dstMacAddr[1],
+ fwdInfo->dstMacAddr[2], fwdInfo->dstMacAddr[3],
+ fwdInfo->dstMacAddr[4], fwdInfo->dstMacAddr[5],
+ status, nbl, fwdInfo->vport);
+
+ if (!nbl) {
+ return;
+ }
+
+ /* XXX - switchContext should not be released */
+ if (isDispatchLevel) {
+ NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ } else {
+ NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, 0);
+ }
+ if (fwdInfo->vport == NULL || status != STATUS_SUCCESS) {
+ goto unlock_free_out;
+ }
+ ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr));
+ ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) ||
+ OvsIphIsZero(&tunKey->src));
+
+ /* Update each header of NB */
+ for (curNb = NET_BUFFER_LIST_FIRST_NB(nbl); curNb != NULL;
+ curNb = curNb->Next) {
+ EthHdr *ethHdr;
+ PMDL curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ PUINT8 bufferStart = (PUINT8)OvsGetMdlWithLowPriority(curMdl);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ OVS_LOG_ERROR("nbl %p nb %p buffer error", nbl, curNb);
+ goto unlock_free_out;
+ }
+
+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (NET_BUFFER_NEXT_NB(curNb)) {
+ OVS_LOG_TRACE("nbl %p nb %p length %u next %u", nbl, curNb,
+ NET_BUFFER_DATA_LENGTH(curNb),
+ NET_BUFFER_DATA_LENGTH(curNb->Next));
+ }
+
+ /* L2 header */
+ ethHdr = (EthHdr *)bufferStart;
+ NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
+ sizeof ethHdr->Destination);
+ NdisMoveMemory(ethHdr->Source, fwdInfo->srcMacAddr,
+ sizeof ethHdr->Source);
+ OVS_LOG_INFO("nbl %p nb %p flags %x", nbl, curNb, tunKey->flags);
+ if (tunKey->flags & OVS_TNL_F_CSUM) {
+ if (ethHdr->Type == htons(ETH_TYPE_IPV4)) {
+ IPHdr *ipHdr;
+ /* IP header */
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+ if (ipHdr->saddr == 0) {
+ UDPHdr *udpHdr;
+
+ ipHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr;
+ /* UDP header */
+ udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+ OVS_LOG_INFO("nbl %p nb %p len %u src %d.%d.%d.%d, csum %u",
+ nbl, curNb, NET_BUFFER_DATA_LENGTH(curNb),
+ ipHdr->saddr & 0xff, (ipHdr->saddr >> 8) & 0xff,
+ (ipHdr->saddr >> 16) & 0xff,
+ (ipHdr->saddr >> 24) & 0xff, udpHdr->check);
+
+ /* Update checksum */
+ ASSERT(udpHdr->check);
+ udpHdr->check = ChecksumUpdate32(udpHdr->check, 0, ipHdr->saddr);
+ }
+ } else if (ethHdr->Type == htons(ETH_TYPE_IPV6)) {
+ IPv6Hdr *ipv6Hdr;
+ UINT32 *srcIpv6Addr;
+ /* IP header */
+ ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+ srcIpv6Addr = (UINT32 *)&ipv6Hdr->saddr;
+ if (srcIpv6Addr[0] == 0 && srcIpv6Addr[1] == 0 &&
+ srcIpv6Addr[2] == 0 && srcIpv6Addr[3] == 0) {
+ UDPHdr *udpHdr;
+ UINT16 udpChksumLen = 0;
+
+ RtlCopyMemory(&ipv6Hdr->saddr,
+ &fwdInfo->srcIphAddr.Ipv6.sin6_addr,
+ sizeof(ipv6Hdr->saddr));
+ /* UDP header */
+ udpHdr = (UDPHdr *)((PCHAR)ipv6Hdr + sizeof *ipv6Hdr);
+ RtlIpv6AddressToStringA(&fwdInfo->srcIphAddr.Ipv6.sin6_addr,
+ ipAddrStr);
+ OVS_LOG_INFO("nbl %p nb %p len %u src %s, csum %u",
+ nbl, curNb, NET_BUFFER_DATA_LENGTH(curNb),
+ ipAddrStr, udpHdr->check);
+
+ udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) -
+ sizeof *ipv6Hdr - sizeof *ethHdr;
+ udpHdr->check = IPv6PseudoChecksum((UINT32*)&ipv6Hdr->saddr,
+ (UINT32*)&ipv6Hdr->daddr,
+ IPPROTO_UDP, udpChksumLen);
+ }
+ }
+ }
+ }
+
+ OvsAcquireDatapathRead(&switchContext->datapath, &dpLockState,
+ isDispatchLevel);
+ OvsInitForwardingCtx(&ovsFwdCtx, switchContext, nbl,
+ fwdInfo->vport->portNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl),
+ NULL, &ovsFwdCtx.layers, TRUE);
+ OvsDoFlowLookupOutput(&ovsFwdCtx);
+ OvsReleaseDatapath(&switchContext->datapath, &dpLockState);
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+
+ return;
+
+unlock_free_out:
+ OvsCompleteNBL(switchContext, nbl, TRUE);
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+}
+
/*
* --------------------------------------------------------------------------
* OvsTunnelPortTx --
@@ -20,6 +20,7 @@
#include "Switch.h"
#include "PacketIO.h"
+typedef union _OVS_FWD_INFO *POVS_FWD_INFO;
/*
* There a lot of data that needs to be maintained while executing the pipeline
@@ -138,4 +139,12 @@ OvsUpdateAddressAndPortForIpv6(OvsForwardingContext *ovsFwdCtx,
struct in6_addr newAddr, UINT16 newPort,
BOOLEAN isSource, BOOLEAN isTx);
+VOID
+OvsEncapPktCB(PNET_BUFFER_LIST nbl,
+ UINT32 inPort,
+ PVOID tunnelKey,
+ PVOID cbData1,
+ PVOID cbData2,
+ NTSTATUS status,
+ POVS_FWD_INFO fwdInfo);
#endif /* __ACTIONS_H_ */
@@ -16,6 +16,7 @@
#include "precomp.h"
+#include "Actions.h"
#include "Atomic.h"
#include "Debug.h"
#include "Flow.h"
@@ -92,6 +93,7 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
UINT32 packetLength;
ULONG mss = 0;
NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ BOOLEAN firstPkt = FALSE;
if (tunKey->dst.si_family == AF_INET) {
headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen;
@@ -101,19 +103,18 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
}
status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo);
- if (status != STATUS_SUCCESS) {
- OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
- // return NDIS_STATUS_PENDING;
- /*
- * XXX: Don't know if the completionList will make any sense when
- * accessed in the callback. Make sure the caveats are known.
- *
- * XXX: This code will work once we are able to grab locks in the
- * callback.
- */
- return NDIS_STATUS_FAILURE;
+ /*
+ * Only support the first packet, if more packets are comming before
+ * FwdInfo is learned, drop them.
+ */
+ if (status == STATUS_NOT_FOUND) {
+ firstPkt = TRUE;
+ } else if (fwdInfo.vport == NULL) {
+ return NDIS_STATUS_PENDING;
+ } else {
+ RtlCopyMemory(switchFwdInfo->value, fwdInfo.value,
+ sizeof fwdInfo.value);
}
- RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, sizeof fwdInfo.value);
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
packetLength = NET_BUFFER_DATA_LENGTH(curNb);
@@ -141,8 +142,13 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
/* If we didn't split the packet above, make a copy now */
if (*newNbl == NULL) {
- *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
- FALSE /*NBL info*/);
+ if (firstPkt == TRUE) {
+ *newNbl = OvsFullCopyNBL(switchContext, curNbl, headRoom,
+ FALSE /*NBL info*/);
+ } else {
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
+ FALSE /*NBL info*/);
+ }
if (*newNbl == NULL) {
OVS_LOG_ERROR("Unable to copy NBL");
return NDIS_STATUS_FAILURE;
@@ -180,11 +186,12 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
/* L2 header */
ethHdr = (EthHdr *)bufferStart;
- NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr,
- sizeof ethHdr->Destination);
- NdisMoveMemory(ethHdr->Source, fwdInfo.srcMacAddr,
- sizeof ethHdr->Source);
-
+ if (firstPkt == FALSE) {
+ NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr,
+ sizeof ethHdr->Destination);
+ NdisMoveMemory(ethHdr->Source, fwdInfo.srcMacAddr,
+ sizeof ethHdr->Source);
+ }
if (tunKey->dst.si_family == AF_INET) {
ethHdr->Type = htons(ETH_TYPE_IPV4);
} else if (tunKey->dst.si_family == AF_INET6) {
@@ -205,10 +212,8 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
IP_DF_NBO : 0;
ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL;
ipHdr->protocol = IPPROTO_UDP;
- ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo.dstIphAddr));
- ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo.srcIphAddr) || OvsIphIsZero(&tunKey->src));
- ipHdr->saddr = fwdInfo.srcIphAddr.Ipv4.sin_addr.s_addr;
- ipHdr->daddr = fwdInfo.dstIphAddr.Ipv4.sin_addr.s_addr;
+ ipHdr->saddr = tunKey->src.Ipv4.sin_addr.s_addr;
+ ipHdr->daddr = tunKey->dst.Ipv4.sin_addr.s_addr;
ipHdr->check = 0;
} else if (tunKey->dst.si_family == AF_INET6) {
/* IPv6 header */
@@ -222,11 +227,9 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
ipv6Hdr->payload_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr - sizeof *ipv6Hdr);
ipv6Hdr->hop_limit = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL;
ipv6Hdr->nexthdr = IPPROTO_UDP;
- ASSERT(OvsIphAddrEquals(&(tunKey->dst), &(fwdInfo.dstIphAddr)));
- ASSERT(OvsIphAddrEquals(&(tunKey->src), &(fwdInfo.srcIphAddr)) || OvsIphIsZero(&(tunKey->src)));
- RtlCopyMemory(&ipv6Hdr->saddr, &fwdInfo.srcIphAddr.Ipv6.sin6_addr,
+ RtlCopyMemory(&ipv6Hdr->saddr, &tunKey->src.Ipv6.sin6_addr,
sizeof(ipv6Hdr->saddr));
- RtlCopyMemory(&ipv6Hdr->daddr, &fwdInfo.dstIphAddr.Ipv6.sin6_addr,
+ RtlCopyMemory(&ipv6Hdr->daddr, &tunKey->dst.Ipv6.sin6_addr,
sizeof(ipv6Hdr->daddr));
}
@@ -294,7 +297,12 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
NET_BUFFER_LIST_INFO(curNbl,
TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
}
-
+ if (firstPkt == TRUE) {
+ OvsFwdIPHelperRequest(*newNbl, 0, tunKey, OvsEncapPktCB,
+ switchContext, NULL);
+ *newNbl = NULL;
+ return NDIS_STATUS_PENDING;
+ }
return STATUS_SUCCESS;
ret_error:
@@ -16,6 +16,7 @@
#include "precomp.h"
+#include "Actions.h"
#include "Atomic.h"
#include "Debug.h"
#include "Flow.h"
@@ -177,7 +178,8 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
POVS_FWD_INFO fwdInfo,
POVS_PACKET_HDR_INFO layers,
POVS_SWITCH_CONTEXT switchContext,
- PNET_BUFFER_LIST *newNbl)
+ PNET_BUFFER_LIST *newNbl,
+ BOOLEAN firstPkt)
{
NDIS_STATUS status;
PNET_BUFFER curNb;
@@ -194,9 +196,6 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
UINT32 headRoom =
OvsGetVxlanTunHdrSize(fwdInfo->dstIphAddr.si_family == AF_INET ?
TRUE : FALSE);
- ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr));
- ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) ||
- OvsIphIsZero(&tunKey->src));
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
packetLength = NET_BUFFER_DATA_LENGTH(curNb);
@@ -224,8 +223,13 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
/* If we didn't split the packet above, make a copy now */
if (*newNbl == NULL) {
- *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
- FALSE /*NBL info*/);
+ if (firstPkt == TRUE) {
+ *newNbl = OvsFullCopyNBL(switchContext, curNbl, headRoom,
+ FALSE /*NBL info*/);
+ } else {
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
+ FALSE /*NBL info*/);
+ }
if (*newNbl == NULL) {
OVS_LOG_ERROR("Unable to copy NBL");
return NDIS_STATUS_FAILURE;
@@ -263,14 +267,16 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
/* L2 header */
ethHdr = (EthHdr *)bufferStart;
- NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
- sizeof ethHdr->Destination);
- NdisMoveMemory(ethHdr->Source, fwdInfo->srcMacAddr,
- sizeof ethHdr->Source);
+ if (firstPkt == FALSE) {
+ NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
+ sizeof ethHdr->Destination);
+ NdisMoveMemory(ethHdr->Source, fwdInfo->srcMacAddr,
+ sizeof ethHdr->Source);
+ }
ethHdr->Type = htons(ETH_TYPE_IPV4);
/* IP header */
- if (fwdInfo->dstIphAddr.si_family == AF_INET) {
+ if (tunKey->dst.si_family == AF_INET) {
ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
ipHdr->ihl = sizeof *ipHdr / 4;
@@ -283,12 +289,9 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
IP_DF_NBO : 0;
ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL;
ipHdr->protocol = IPPROTO_UDP;
- ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr));
- ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) ||
- OvsIphIsZero(&tunKey->src));
- ipHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr;
- ipHdr->daddr = fwdInfo->dstIphAddr.Ipv4.sin_addr.s_addr;
+ ipHdr->saddr = tunKey->src.Ipv4.sin_addr.s_addr;
+ ipHdr->daddr = tunKey->dst.Ipv4.sin_addr.s_addr;
ipHdr->check = 0;
@@ -323,7 +326,7 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
}
csumInfo.Value = 0;
- if (fwdInfo->dstIphAddr.si_family == AF_INET) {
+ if (tunKey->dst.si_family == AF_INET) {
csumInfo.Transmit.IpHeaderChecksum = 1;
csumInfo.Transmit.IsIPv4 = 1;
} else {
@@ -334,7 +337,12 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
}
NET_BUFFER_LIST_INFO(curNbl,
TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
-
+ if (firstPkt == TRUE) {
+ OvsFwdIPHelperRequest(*newNbl, 0, tunKey, OvsEncapPktCB,
+ switchContext, NULL);
+ *newNbl = NULL;
+ return NDIS_STATUS_PENDING;
+ }
return STATUS_SUCCESS;
@@ -363,6 +371,7 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport,
{
NTSTATUS status;
OVS_FWD_INFO fwdInfo;
+ BOOLEAN firstPkt = FALSE;
if (tunKey->dst.si_family != AF_INET) {
/*V6 tunnel support will be supported later*/
@@ -370,22 +379,21 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport,
}
status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo);
- if (status != STATUS_SUCCESS) {
- OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
- /*
- * XXX: Don't know if the completionList will make any sense when
- * accessed in the callback. Make sure the caveats are known.
- *
- * XXX: This code will work once we are able to grab locks in the
- * callback.
- */
- return NDIS_STATUS_FAILURE;
+ /*
+ * Only support the first packet, if more packets are comming before
+ * FwdInfo is learned, drop them.
+ */
+ if (status == STATUS_NOT_FOUND) {
+ firstPkt = TRUE;
+ } else if (fwdInfo.vport == NULL) {
+ return NDIS_STATUS_PENDING;
+ } else {
+ RtlCopyMemory(switchFwdInfo->value, fwdInfo.value,
+ sizeof fwdInfo.value);
}
- RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, sizeof fwdInfo.value);
-
return OvsDoEncapVxlan(vport, curNbl, tunKey, &fwdInfo, layers,
- switchContext, newNbl);
+ switchContext, newNbl, firstPkt);
}