@@ -1083,6 +1083,31 @@ nblcopy_error:
return NULL;
}
+NDIS_STATUS
+GetIpHeaderInfo(PNET_BUFFER_LIST curNbl,
+ UINT32 *hdrSize)
+{
+ CHAR *ethBuf[sizeof(EthHdr)];
+ EthHdr *eth;
+ IPHdr *ipHdr;
+ PNET_BUFFER curNb;
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
+
+ eth = (EthHdr *)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH,
+ (PVOID)ðBuf, 1, 0);
+ if (eth == NULL) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+ ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH);
+ if (ipHdr == NULL) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+ *hdrSize = (UINT32)(ETH_HEADER_LENGTH + (ipHdr->ihl * 4));
+ return NDIS_STATUS_SUCCESS;
+}
+
/*
* --------------------------------------------------------------------------
* GetSegmentHeaderInfo
@@ -1112,15 +1137,16 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
/*
* --------------------------------------------------------------------------
- * FixSegmentHeader
+ * FixPacketHeader
*
- * Fix IP length, IP checksum, TCP sequence number and TCP checksum
- * in the segment.
+ * Fix IP length, Offset, IP checksum, TCP sequence number and TCP checksum
+ * in the netbuffer if applicable.
* --------------------------------------------------------------------------
*/
static NDIS_STATUS
-FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
- BOOLEAN lastPacket, UINT16 packetCounter)
+FixPacketHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
+ BOOLEAN lastPacket, UINT16 packetCounter, UINT16 offset,
+ BOOLEAN isFragment)
{
EthHdr *dstEth = NULL;
TCPHdr *dstTCP = NULL;
@@ -1139,41 +1165,55 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
case ETH_TYPE_IPV4_NBO:
{
IPHdr *dstIP = NULL;
-
- ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
+ if (!isFragment) {
+ ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
>= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr));
- dstIP = (IPHdr *)((PCHAR)dstEth + sizeof(*dstEth));
- dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
- ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
+ dstIP = (IPHdr *)((PCHAR)dstEth + sizeof(*dstEth));
+ dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
+ ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
>= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
- /* Fix IP length and checksum */
- ASSERT(dstIP->protocol == IPPROTO_TCP);
- dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
- dstIP->id += packetCounter;
+ /* Fix IP length and checksum */
+ ASSERT(dstIP->protocol == IPPROTO_TCP);
+ dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
+ dstIP->id += packetCounter;
+ dstTCP->seq = htonl(seqNumber);
+
+ /*
+ * Set the TCP FIN and PSH bit only for the last packet
+ * More information can be found under:
+ * https://msdn.microsoft.com/en-us/library/windows/hardware/ff568840%28v=vs.85%29.aspx
+ */
+ if (dstTCP->fin) {
+ dstTCP->fin = lastPacket;
+ }
+ if (dstTCP->psh) {
+ dstTCP->psh = lastPacket;
+ }
+ UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP);
+ dstTCP->check = IPPseudoChecksum(&dstIP->saddr,
+ &dstIP->daddr,
+ IPPROTO_TCP,
+ csumLength);
+ dstTCP->check = CalculateChecksumNB(nb,
+ csumLength,
+ sizeof(*dstEth) + dstIP->ihl * 4);
+ } else {
+ ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
+ >= sizeof(EthHdr) + sizeof(IPHdr));
+
+ dstIP = (IPHdr *)((PCHAR)dstEth + sizeof(*dstEth));
+ ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
+ >= sizeof(EthHdr) + dstIP->ihl * 4);
+ dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4);
+ if (lastPacket) {
+ dstIP->frag_off = htons(offset & IP_OFFSET);
+ } else {
+ dstIP->frag_off = htons((offset & IP_OFFSET) | IP_MF);
+ }
+ }
dstIP->check = 0;
dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);
- dstTCP->seq = htonl(seqNumber);
-
- /*
- * Set the TCP FIN and PSH bit only for the last packet
- * More information can be found under:
- * https://msdn.microsoft.com/en-us/library/windows/hardware/ff568840%28v=vs.85%29.aspx
- */
- if (dstTCP->fin) {
- dstTCP->fin = lastPacket;
- }
- if (dstTCP->psh) {
- dstTCP->psh = lastPacket;
- }
- UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP);
- dstTCP->check = IPPseudoChecksum(&dstIP->saddr,
- &dstIP->daddr,
- IPPROTO_TCP,
- csumLength);
- dstTCP->check = CalculateChecksumNB(nb,
- csumLength,
- sizeof(*dstEth) + dstIP->ihl * 4);
break;
}
case ETH_TYPE_IPV6_NBO:
@@ -1217,11 +1257,29 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
return STATUS_SUCCESS;
}
+ /*
+ * --------------------------------------------------------------------------
+ * OvsTcpSegmentNBL --
+ * Wrapper function to Fragment a given NBL based on MSS
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsTcpSegmentNBL(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ UINT32 mss,
+ UINT32 headRoom,
+ BOOLEAN isIpFragment)
+{
+ return OvsFragmentNBL(ovsContext, nbl, hdrInfo, mss, headRoom, isIpFragment);
+}
+
+
/*
* --------------------------------------------------------------------------
- * OvsTcpSegmentNBL --
+ * OvsFragmentNBL --
*
- * Segment TCP payload, and prepend each segment with ether/IP/TCP header.
+ * Fragment NBL payload, and prepend each segment with either/IP/TCP header.
* Leave headRoom for additional encap.
*
* Please note,
@@ -1234,24 +1292,25 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
* --------------------------------------------------------------------------
*/
PNET_BUFFER_LIST
-OvsTcpSegmentNBL(PVOID ovsContext,
- PNET_BUFFER_LIST nbl,
- POVS_PACKET_HDR_INFO hdrInfo,
- UINT32 mss,
- UINT32 headRoom)
+OvsFragmentNBL(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ UINT32 mss,
+ UINT32 headRoom,
+ BOOLEAN isIpFragment)
{
POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
#ifdef DBG
POVS_NBL_POOL ovsPool = &context->ovsPool;
#endif
POVS_BUFFER_CONTEXT dstCtx, srcCtx;
- UINT32 size, hdrSize, seqNumber;
+ UINT32 size, hdrSize, nblSize, seqNumber = 0;
PNET_BUFFER_LIST newNbl;
PNET_BUFFER nb, newNb;
NDIS_STATUS status;
UINT16 segmentSize;
ULONG copiedSize;
- UINT16 packetCounter = 0;
+ UINT16 offset = 0, packetCounter = 0;
srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
@@ -1263,18 +1322,28 @@ OvsTcpSegmentNBL(PVOID ovsContext,
nb = NET_BUFFER_LIST_FIRST_NB(nbl);
ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
- /* Figure out the segment header size */
- status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
+ /* Figure out the header size */
+ if (isIpFragment) {
+ status = GetIpHeaderInfo(nbl, &hdrSize);
+ } else {
+ status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
+ }
if (status != NDIS_STATUS_SUCCESS) {
OVS_LOG_INFO("Cannot parse NBL header");
return NULL;
}
-
+ /* Get the NBL size. */
+ if (isIpFragment) {
+ nblSize = mss - hdrSize;
+ } else {
+ nblSize = mss;
+ }
size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize;
/* XXX add to ovsPool counters? */
- newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL,
- NULL, hdrSize, mss, hdrSize + headRoom , 0, 0);
+ newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL, NULL, hdrSize,
+ nblSize, hdrSize + headRoom ,
+ 0, 0);
if (newNbl == NULL) {
return NULL;
}
@@ -1282,7 +1351,7 @@ OvsTcpSegmentNBL(PVOID ovsContext,
/* Now deal with TCP payload */
for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL;
newNb = NET_BUFFER_NEXT_NB(newNb)) {
- segmentSize = (size > mss ? mss : size) & 0xffff;
+ segmentSize = (size > nblSize ? nblSize : size) & 0xffff;
if (headRoom) {
NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL);
}
@@ -1294,17 +1363,21 @@ OvsTcpSegmentNBL(PVOID ovsContext,
goto nblcopy_error;
}
- status = FixSegmentHeader(newNb, segmentSize, seqNumber,
- NET_BUFFER_NEXT_NB(newNb) == NULL,
- packetCounter);
+ status = FixPacketHeader(newNb, segmentSize, seqNumber,
+ NET_BUFFER_NEXT_NB(newNb) == NULL,
+ packetCounter, offset, isIpFragment);
+
if (status != NDIS_STATUS_SUCCESS) {
goto nblcopy_error;
}
-
/* Move on to the next segment */
+ if (isIpFragment) {
+ offset += (segmentSize) / 8;
+ } else {
+ seqNumber += segmentSize;
+ }
size -= segmentSize;
- seqNumber += segmentSize;
packetCounter++;
}
@@ -1318,6 +1391,15 @@ OvsTcpSegmentNBL(PVOID ovsContext,
goto nbl_context_error;
}
+ if (isIpFragment) {
+ /* Copy with Flag - NDIS_SWITCH_COPY_NBL_INFO_FLAGS_PRESERVE_DESTINATIONS. */
+ status = context->NdisSwitchHandlers.
+ CopyNetBufferListInfo(context->ovsPool.ndisContext, newNbl, nbl, 1);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nbl_context_error;
+ }
+ }
newNbl->ParentNetBufferList = nbl;
/* Remember it's a fragment NBL so we can free it properly */
@@ -1339,7 +1421,7 @@ OvsTcpSegmentNBL(PVOID ovsContext,
OvsDumpNetBufferList(newNbl);
OvsDumpForwardingDetails(newNbl);
#endif
- OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl);
+ OVS_LOG_TRACE("Fragmnet nbl %p to newNbl: %p", nbl, newNbl);
return newNbl;
nbl_context_error:
@@ -115,7 +115,15 @@ PNET_BUFFER_LIST OvsTcpSegmentNBL(PVOID context,
PNET_BUFFER_LIST nbl,
POVS_PACKET_HDR_INFO hdrInfo,
UINT32 MSS,
- UINT32 headRoom);
+ UINT32 headRoom,
+ BOOLEAN isIpFragment);
+
+PNET_BUFFER_LIST OvsFragmentNBL(PVOID context,
+ PNET_BUFFER_LIST nbl,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ UINT32 MSS,
+ UINT32 headRoom,
+ BOOLEAN isIpFragment);
PNET_BUFFER_LIST OvsAllocateNBLFromBuffer(PVOID context,
PVOID buffer,
@@ -118,7 +118,7 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
if (mss) {
OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
*newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
- mss, headRoom);
+ mss, headRoom, FALSE);
if (*newNbl == NULL) {
OVS_LOG_ERROR("Unable to segment NBL");
return NDIS_STATUS_FAILURE;
@@ -158,7 +158,7 @@ OvsDoEncapGre(POVS_VPORT_ENTRY vport,
if (mss) {
OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
*newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
- mss, headRoom);
+ mss, headRoom, FALSE);
if (*newNbl == NULL) {
OVS_LOG_ERROR("Unable to segment NBL");
return NDIS_STATUS_FAILURE;
@@ -185,7 +185,7 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
if ((innerFrameLen > OVS_MAX_STT_PACKET_LENGTH) ||
(layers->l4Offset > OVS_MAX_STT_L4_OFFSET_LENGTH)) {
*newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
- mss - headRoom, headRoom);
+ mss - headRoom, headRoom, FALSE);
if (*newNbl == NULL) {
OVS_LOG_ERROR("Unable to segment NBL");
return NDIS_STATUS_FAILURE;
@@ -784,7 +784,7 @@ OvsCreateAndAddPackets(PVOID userData,
if (tsoInfo.LsoV1Transmit.MSS) {
OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
- tsoInfo.LsoV1Transmit.MSS , 0);
+ tsoInfo.LsoV1Transmit.MSS , 0, FALSE);
if (newNbl == NULL) {
return NDIS_STATUS_FAILURE;
}
@@ -205,7 +205,7 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
if (mss) {
OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
*newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
- mss, headRoom);
+ mss, headRoom, FALSE);
if (*newNbl == NULL) {
OVS_LOG_ERROR("Unable to segment NBL");
return NDIS_STATUS_FAILURE;
With this patch, OvsTcpSegmentNBL not only supports fragmenting NBL to TCP segments but also Ipv4 fragments. To reflect the new changes, renamed function name from OvsTcpSegmentNBL to OvsFragmentNBL and created a wrapper for OvsTcpSegmentNBL. v2->v3: - Updated log message and function summary v1->v2: - Fix compile error for release mode. Signed-off-by: Anand Kumar <kumaranand@vmware.com> --- datapath-windows/ovsext/BufferMgmt.c | 194 +++++++++++++++++++++++++---------- datapath-windows/ovsext/BufferMgmt.h | 10 +- datapath-windows/ovsext/Geneve.c | 2 +- datapath-windows/ovsext/Gre.c | 2 +- datapath-windows/ovsext/Stt.c | 2 +- datapath-windows/ovsext/User.c | 2 +- datapath-windows/ovsext/Vxlan.c | 2 +- 7 files changed, 152 insertions(+), 62 deletions(-)