@@ -146,19 +146,40 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
POVS_STT_VPORT vportStt;
UINT32 headRoom = OvsGetSttTunHdrSize();
UINT32 tcpChksumLen;
+ ULONG mss = 0;
+ ULONG lsoType = 0;
+ ULONG ipVersion = 0;
UNREFERENCED_PARAMETER(layers);
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+
+ /* Verify if inner checksum is verified */
+ BOOLEAN innerChecksumVerified = FALSE;
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+ TcpIpChecksumNetBufferListInfo);
+
+ innerChecksumVerified = csumInfo.Transmit.IpHeaderChecksum == 0 &&
+ csumInfo.Transmit.TcpChecksum == 0 &&
+ csumInfo.Transmit.UdpChecksum == 0;
+
if (layers->isTcp) {
NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
- TcpLargeSendNetBufferListInfo);
- if (lsoInfo.LsoV1Transmit.MSS) {
- /* XXX We don't handle LSO yet */
- OVS_LOG_ERROR("LSO on STT is not supported");
- return NDIS_STATUS_FAILURE;
+ TcpLargeSendNetBufferListInfo);
+ lsoType = lsoInfo.Transmit.Type;
+ switch (lsoType) {
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
+ mss = lsoInfo.LsoV1Transmit.MSS;
+ break;
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
+ mss = lsoInfo.LsoV2Transmit.MSS;
+ ipVersion = lsoInfo.LsoV2Transmit.IPVersion;
+ break;
+ default:
+ break;
}
}
@@ -243,7 +264,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
outerIpHdr->check = 0;
outerIpHdr->saddr = fwdInfo->srcIpAddr;
outerIpHdr->daddr = tunKey->dst;
- outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0);
/* L4 header */
RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr);
@@ -266,6 +286,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
/* XXX need to peek into the inner packet, hard code for now */
sttHdr->flags = STT_PROTO_IPV4;
+
+ /* XXX need to handle Checksum partial flag */
+ if (innerChecksumVerified) {
+ sttHdr->flags |= STT_CSUM_VERIFIED;
+ }
sttHdr->l4Offset = 0;
sttHdr->reserved = 0;
@@ -276,12 +301,37 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
/* Zero out stt padding */
*(uint16 *)(sttHdr + 1) = 0;
- /* Calculate software tcp checksum */
- outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen,
- sizeof(EthHdr) + sizeof(IPHdr));
- if (outerTcpHdr->check == 0) {
- status = NDIS_STATUS_FAILURE;
- goto ret_error;
+ /* Offload IP and TCP checksum */
+ csumInfo.Value = 0;
+ csumInfo.Transmit.IpHeaderChecksum = 1;
+ csumInfo.Transmit.TcpChecksum = 1;
+ csumInfo.Transmit.IsIPv4 = 1;
+ csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr + sizeof *outerIpHdr;
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
+
+ /* Offload TCP Segmentation */
+ if (mss) {
+ OVS_LOG_ERROR("lsoInfo.Transmit.Type:%d lsoInfo packet - mss:%d", lsoType, mss);
+
+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
+ lsoInfo.Value = 0;
+ ULONG tcpHeaderOffset = headRoom + sizeof(EthHdr) + sizeof(IPHdr);
+ lsoInfo.Transmit.Type = lsoType;
+ switch (lsoType) {
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
+ lsoInfo.LsoV1Transmit.MSS = mss;
+ lsoInfo.LsoV1Transmit.TcpHeaderOffset = tcpHeaderOffset;
+ break;
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
+ lsoInfo.LsoV2Transmit.MSS = mss;
+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
+ lsoInfo.LsoV2Transmit.IPVersion = ipVersion;
+ break;
+ default:
+ break;
+ }
+
+ NET_BUFFER_LIST_INFO(curNbl, TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
}
return STATUS_SUCCESS;
@@ -293,6 +343,48 @@ ret_error:
}
/*
+ *----------------------------------------------------------------------------
+ * OvsCalculateTCPChecksum
+ * Calculate TCP checksum
+ *----------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
+{
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
+ UINT16 checkSum;
+
+ /* Check if TCP Checksum has been calculated by NIC */
+ if (csumInfo.Receive.TcpChecksumSucceeded) {
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
+ NULL, 1, 0);
+
+ if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) {
+ IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth);
+ UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4;
+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip);
+ checkSum = tcp->check;
+
+ tcp->check = 0;
+ tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+ IPPROTO_TCP, (UINT16)l4Payload);
+ tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload),
+ sizeof(EthHdr) + ip->ihl * 4);
+ if (checkSum != tcp->check) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+ }
+
+ csumInfo.Receive.TcpChecksumSucceeded = 1;
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
* --------------------------------------------------------------------------
* OvsDecapStt --
* Decapsulates an STT packet.
@@ -311,6 +403,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
SttHdr *sttHdr;
char *sttBuf[STT_HDR_LEN];
UINT32 advanceCnt, hdrLen;
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
@@ -321,6 +414,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
return NDIS_STATUS_INVALID_LENGTH;
}
+ /* Verify outer TCP Checksum */
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
+
+ /* Check if NIC has indicated TCP checksum failure */
+ if (csumInfo.Receive.TcpChecksumFailed) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+
+ /* Calculate the TCP Checksum */
+ status = OvsCalculateTCPChecksum(curNbl, curNb);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+
/* Skip Eth header */
hdrLen = sizeof(EthHdr);
NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
@@ -348,12 +455,67 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
tunKey->tos = ipHdr->tos;
tunKey->ttl = ipHdr->ttl;
tunKey->pad = 0;
-
+
/* Skip stt header, DataOffset points to inner pkt now. */
hdrLen = STT_HDR_LEN;
NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
advanceCnt += hdrLen;
+ /* Verify checksum for inner packet if it's required */
+ BOOLEAN innerChecksumVerified = sttHdr->flags & STT_CSUM_VERIFIED;
+
+ if (!innerChecksumVerified) {
+ EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
+ NULL, 1, 0);
+
+ if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) {
+ IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth);
+ ip->check = 0;
+ ip->check = IPChecksum((UINT8 *)ip, sizeof *ip, 0);
+ UINT16 l4Payload = (UINT16)ntohs(ip->tot_len) - ip->ihl * 4;
+ UINT32 offset = sizeof(EthHdr) + sizeof(IPHdr);
+
+ if (ip->protocol == IPPROTO_TCP) {
+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip);
+ tcp->check = 0;
+ tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+ IPPROTO_TCP,
+ (UINT16)l4Payload);
+ tcp->check = CalculateChecksumNB(curNb, l4Payload, offset);
+ } else if (ip->protocol == IPPROTO_UDP) {
+ UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+ udp->check = 0;
+ udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+ IPPROTO_UDP, l4Payload);
+ udp->check = CalculateChecksumNB(curNb, l4Payload, offset);
+ }
+ }
+ if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) {
+ IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth);
+ UINT32 offset = (UINT32)(sizeof *eth + sizeof *ip);
+ UINT16 totalLength = (UINT16)ntohs(ip->payload_len);
+
+ if (ip->nexthdr == IPPROTO_TCP) {
+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip);
+ tcp->check = 0;
+ tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr,
+ (UINT32 *)&ip->daddr,
+ IPPROTO_TCP, totalLength);
+ tcp->check = CalculateChecksumNB(curNb, totalLength, offset);
+ }
+ else if (ip->nexthdr == IPPROTO_UDP) {
+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip);
+ tcp->check = 0;
+ tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr,
+ (UINT32 *)&ip->daddr,
+ IPPROTO_UDP, totalLength);
+ tcp->check = CalculateChecksumNB(curNb, totalLength, offset);
+ }
+ }
+
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+ }
+
*newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE,
0, FALSE /*copy NBL info*/);
@@ -366,4 +528,4 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
}
return status;
-}
+}
\ No newline at end of file
Enable support for Checksum offloads in STT if it's enabled in the Windows VM. Signed-off-by: Sairam Venugopal <vsairam@vmware.com> --- datapath-windows/ovsext/Stt.c | 190 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 176 insertions(+), 14 deletions(-)