diff mbox

[ovs-dev] datapath-windows: Compute checksums for VXLAN inner packets

Message ID 1442016211924.7460@vmware.com
State Not Applicable
Headers show

Commit Message

Sairam Venugopal Sept. 12, 2015, 12:03 a.m. UTC
Sorry about the bad indents on my comments. I will re-send the review.

-Sairam
diff mbox

Patch

diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c
index 3550e20..bf53fc3 100644
--- a/datapath-windows/ovsext/BufferMgmt.c
+++ b/datapath-windows/ovsext/BufferMgmt.c
@@ -1116,7 +1116,8 @@  GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
  * --------------------------------------------------------------------------
  */
 static NDIS_STATUS
-FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber)
+FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
+                 BOOLEAN lastPacket, UINT16 packetCounter)
 {
     EthHdr *dstEth;
     IPHdr *dstIP;
@@ -1141,18 +1142,29 @@  FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber)
     /* Fix IP length and checksum */
     ASSERT(dstIP->protocol == IPPROTO_TCP);
     dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
+    dstIP->id += packetCounter;
     dstIP->check = 0;
     dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);

     /* Fix TCP checksum */
     dstTCP->seq = htonl(seqNumber);
-    dstTCP->check =
-        IPPseudoChecksum((UINT32 *)&dstIP->saddr,
-                         (UINT32 *)&dstIP->daddr,
-                         IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP));
+
+    if (dstTCP->fin) {
+        dstTCP->fin = lastPacket;
+    }
+    if (dstTCP->psh) {
+        dstTCP->psh = lastPacket;
+    }
+

Get rid of the extra ';' from  TCP_HDR_LEN(dstTCP);;

+    UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP);;

+    dstTCP->check = IPPseudoChecksum(&dstIP->saddr,
+        &dstIP->daddr,
+        IPPROTO_TCP,
+        csumLength);
     dstTCP->check = CalculateChecksumNB(nb,
-            (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4),
-            sizeof *dstEth + dstIP->ihl * 4);
+        csumLength,
+        sizeof *dstEth + dstIP->ihl * 4);
+
     return STATUS_SUCCESS;
 }

@@ -1190,6 +1202,7 @@  OvsTcpSegmentNBL(PVOID ovsContext,
     NDIS_STATUS status;
     UINT16 segmentSize;
     ULONG copiedSize;
+    UINT16 packetCounter = 0;

     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
@@ -1232,7 +1245,9 @@  OvsTcpSegmentNBL(PVOID ovsContext,
             goto nblcopy_error;
         }

-        status = FixSegmentHeader(newNb, segmentSize, seqNumber);
+        status = FixSegmentHeader(newNb, segmentSize, seqNumber,
+                                  NET_BUFFER_NEXT_NB(newNb) == NULL,
+                                  packetCounter);
         if (status != NDIS_STATUS_SUCCESS) {
             goto nblcopy_error;
         }
@@ -1241,6 +1256,7 @@  OvsTcpSegmentNBL(PVOID ovsContext,
         /* Move on to the next segment */
         size -= segmentSize;
         seqNumber += segmentSize;
+        packetCounter++;
     }

     status = OvsAllocateNBLContext(context, newNbl);
diff --git a/datapath-windows/ovsext/Checksum.c b/datapath-windows/ovsext/Checksum.c
index 510a094..5d9b035 100644
--- a/datapath-windows/ovsext/Checksum.c
+++ b/datapath-windows/ovsext/Checksum.c

Are there any tests to validate these changes in Checksum.c? If not, can someone else ack it.

@@ -68,34 +68,48 @@  CalculateOnesComplement(UINT8 *start,
 {
     UINT64  sum = 0, val;
     UINT64  *src = (UINT64 *)start;
-    union {
-        UINT32 val;
-        UINT8  b8[4];
-    } tmp;
-
     while (totalLength > 7) {
         val = *src;
-        sum += (val >> 32) + (val & 0xffffffff);
+        sum += val;
+        if (sum < val) sum++;
         src++;
         totalLength -= 8;
     }
+
+    start = (UINT8 *)src;
+
     if (totalLength > 3) {
-        sum += *(UINT32 *)src;
-        src = (UINT64 *)((UINT8 *)src + 4);
+        UINT32 val = *(UINT32 *)start;
+        sum += val;
+        if (sum < val) sum++;
+        start += 4;
         totalLength -= 4;
     }
-    start = (UINT8 *)src;
-    tmp.val = 0;
-    switch (totalLength) {
-    case 3:
-        tmp.b8[2] = start[2];
-    case 2:
-        tmp.b8[1] = start[1];
-    case 1:
-        tmp.b8[0] = start[0];
-        sum += tmp.val;
+
+    if (totalLength > 1) {
+        UINT16 val = *(UINT16 *)start;
+        sum += val;
+        if (sum < val) sum++;
+        start += 2;
+        totalLength -= 2;
     }
-    sum = (isEvenStart ? sum : swap64(sum)) + initial;
+
+    if (totalLength > 0) {
+        UINT8 val = *start;
+        sum += val;
+        if (sum < val) sum++;
+        start += 1;
+        totalLength -= 1;
+    }
+    ASSERT(totalLength == 0);
+
+    if (!isEvenStart) {
+        sum = _byteswap_uint64(sum);
+    }
+
+    sum += initial;
+    if (sum < initial) sum++;
+
     return sum;
 }

@@ -428,6 +442,7 @@  CalculateChecksumNB(const PNET_BUFFER nb,
     ULONG firstMdlLen;
     /* Running count of bytes in remainder of the MDLs including current. */
     ULONG packetLen;
+    BOOLEAN swapEnd = 1 & csumDataLen;

     if ((nb == NULL) || (csumDataLen == 0)
             || (offset >= NET_BUFFER_DATA_LENGTH(nb))
@@ -482,10 +497,8 @@  CalculateChecksumNB(const PNET_BUFFER nb,
     while (csumDataLen && (currentMdl != NULL)) {
         ASSERT(mdlLen < 65536);
         csLen = MIN((UINT16) mdlLen, csumDataLen);
-        //XXX Not handling odd bytes yet.
-        ASSERT(((csLen & 0x1) == 0) || csumDataLen <= mdlLen);

-        csum = CalculateOnesComplement(src, csLen, csum, TRUE);

You can use !swapEnd below.

+        csum = CalculateOnesComplement(src, csLen, csum, !(1 & csumDataLen));
         fold64(csum);

         csumDataLen -= csLen;
@@ -504,9 +517,14 @@  CalculateChecksumNB(const PNET_BUFFER nb,
         }
     }

+    fold64(csum);
     ASSERT(csumDataLen == 0);
     ASSERT((csum & ~0xffff) == 0);
-    return (UINT16) ~csum;
+    csum = (UINT16)~csum;
+    if (swapEnd) {
+        return _byteswap_ushort((UINT16)csum);
+    }
+    return (UINT16)csum;
 }

 /*
diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c
index 2364f28..a179fbe 100644
--- a/datapath-windows/ovsext/Vxlan.c
+++ b/datapath-windows/ovsext/Vxlan.c
@@ -152,9 +152,9 @@  OvsCleanupVxlanTunnel(PIRP irp,

     if (vxlanPort->filterID != 0) {
         status = OvsTunnelFilterDelete(irp,
-                                      vxlanPort->filterID,
-                                      callback,
-                                      tunnelContext);
+                                       vxlanPort->filterID,
+                                       callback,
+                                       tunnelContext);
     } else {
         OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG);
         vport->priv = NULL;
@@ -190,6 +190,9 @@  OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
     POVS_VXLAN_VPORT vportVxlan;
     UINT32 headRoom = OvsGetVxlanTunHdrSize();
     UINT32 packetLength;
+    NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+    csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+                                          TcpIpChecksumNetBufferListInfo);

     /*
      * XXX: the assumption currently is that the NBL is owned by OVS, and
@@ -198,20 +201,24 @@  OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
      */
     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+
     if (layers->isTcp) {
         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;

         tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
                 TcpLargeSendNetBufferListInfo);
-        OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength);
+        OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS,
+                      packetLength);
         if (tsoInfo.LsoV1Transmit.MSS) {
             OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
             *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
-                        tsoInfo.LsoV1Transmit.MSS, headRoom);
+                                       tsoInfo.LsoV1Transmit.MSS, headRoom);
             if (*newNbl == NULL) {
                 OVS_LOG_ERROR("Unable to segment NBL");
                 return NDIS_STATUS_FAILURE;
             }
+            /* Clear out LSO flags after this point */
+            NET_BUFFER_LIST_INFO(curNbl, TcpLargeSendNetBufferListInfo) = 0;
         }
     }

@@ -226,6 +233,61 @@  OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
             OVS_LOG_ERROR("Unable to copy NBL");
             return NDIS_STATUS_FAILURE;
         }
+        /*
+         * To this point we do not have VXLAN offloading.
+         * Apply defined checksums
+         */

Correct me if am wrong, this computes checksum for the inner packet if there is no segmentation.
In case the inner packet is segmented via LSO, shouldn't there be additional checksum calculations for each inner-segment?

+        curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+        curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+        bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
+        bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+
+        if (layers->isIPv4) {
+            IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
+
+            if (csumInfo.Transmit.IpHeaderChecksum) {
+                ip->check = 0;
+                ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
+            }
+
+            if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+                tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+                                              IPPROTO_TCP, csumLength);
+                tcp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+            else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+                udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+                                              IPPROTO_UDP, csumLength);
+                udp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+        } else if (layers->isIPv6) {
+            IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
+
+            if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+                tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+                                                (UINT32 *) &ip->daddr,
+                                                IPPROTO_TCP, csumLength);
+                tcp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+            else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+                udp->check = IPPseudoChecksum((UINT32 *) &ip->saddr,
+                                              (UINT32 *)&ip->daddr,
+                                              IPPROTO_UDP, csumLength);
+                udp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }