Message ID | 1442261680-5464-1-git-send-email-vsairam@vmware.com |
---|---|
State | Changes Requested |
Headers | show |
Hi Sairam, Thank you for the patch. Maybe I am missing something conceptually, but I do not understand why you are computing computing the checksums on receive (decapsulation) for the inner packet. IMO it should be the other way around: you should compute the checksums for the inner packet in the case they are enabled in the VM when sending it, set always the flags to STT_CSUM_VERIFIED unless LSO is enabled, and rely on the networking stack of the VM to drop the packet if the checksum was incorrect. I looked over the RFC(https://tools.ietf.org/html/draft-davie-stt-06) and could not see a clear reference to it. Small nit: $ git apply ovs-dev-v2-datapath-windows-Enable-checksum-offloads-in-STT.patch ovs-dev-v2-datapath-windows-Enable-checksum-offloads-in-STT.patch:13: trailing whitespace. csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, ovs-dev-v2-datapath-windows-Enable-checksum-offloads-in-STT.patch:75: trailing whitespace. ovs-dev-v2-datapath-windows-Enable-checksum-offloads-in-STT.patch:123: trailing whitespace. ovs-dev-v2-datapath-windows-Enable-checksum-offloads-in-STT.patch:137: trailing whitespace. ovs-dev-v2-datapath-windows-Enable-checksum-offloads-in-STT.patch:152: trailing whitespace. if (ip->protocol == IPPROTO_TCP) { warning: 5 lines add whitespace errors. More comments inlined. Thanks, Alin. > -----Mesaj original----- > De la: dev [mailto:dev-bounces@openvswitch.org] În numele Sairam > Venugopal > Trimis: Monday, September 14, 2015 11:15 PM > Către: dev@openvswitch.org > Subiect: [ovs-dev] [PATCH v2] datapath-windows: Enable checksum offloads > in STT > > Enable support for Checksum offloads in STT if it's enabled in the Windows > VM. > > Signed-off-by: Sairam Venugopal <vsairam@vmware.com> > --- > datapath-windows/ovsext/Stt.c | 141 > +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 134 insertions(+), 7 deletions(-) > > diff --git a/datapath-windows/ovsext/Stt.c b/datapath- > windows/ovsext/Stt.c index b6272c3..f9d0afb 100644 > --- a/datapath-windows/ovsext/Stt.c > +++ b/datapath-windows/ovsext/Stt.c > @@ -150,6 +150,17 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > UNREFERENCED_PARAMETER(layers); > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > + > + /* Verify if inner checksum is verified */ > + BOOLEAN innerChecksumVerified = FALSE; > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > + > + TcpIpChecksumNetBufferListInfo); > + > + innerChecksumVerified = csumInfo.Transmit.IpHeaderChecksum == 0 && > + csumInfo.Transmit.TcpChecksum == 0 && > + csumInfo.Transmit.UdpChecksum == 0; > + > if (layers->isTcp) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; > > @@ -266,6 +277,10 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > > /* XXX need to peek into the inner packet, hard code for now */ > sttHdr->flags = STT_PROTO_IPV4; > + /* XXX need to handle Checksum partial flag */ > + if (innerChecksumVerified) { > + sttHdr->flags |= STT_CSUM_VERIFIED; > + } > sttHdr->l4Offset = 0; > > sttHdr->reserved = 0; > @@ -276,13 +291,13 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > /* Zero out stt padding */ > *(uint16 *)(sttHdr + 1) = 0; > > - /* Calculate software tcp checksum */ > - outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) > tcpChksumLen, > - sizeof(EthHdr) + sizeof(IPHdr)); > - if (outerTcpHdr->check == 0) { > - status = NDIS_STATUS_FAILURE; > - goto ret_error; > - } > + /* Offload IP and TCP checksum */ > + csumInfo.Value = 0; > + csumInfo.Transmit.IpHeaderChecksum = 1; > + csumInfo.Transmit.TcpChecksum = 1; > + csumInfo.Transmit.IsIPv4 = 1; > + csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr + sizeof > *outerIpHdr; > + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = > + csumInfo.Value; [Alin Gabriel Serdean: ] Since you are trying to force the adapter to calculate IP/TCP checksums for you. Maybe it would be an idea to not compute IP checksum and tcp pseudo header checksum which is done above. https://github.com/openvswitch/ovs/blob/master/datapath-windows/ovsext/Stt.c#L246 https://github.com/openvswitch/ovs/blob/master/datapath-windows/ovsext/Stt.c#L263 I will do some tests to see what happens in the case checksums are disabled/not supported on the host adapter and let you know if the test went ok. I think you should compute ip/tcp checksums in the case they are disabled and not rely on the NDIS stack for it. > > return STATUS_SUCCESS; > > @@ -293,6 +308,48 @@ ret_error: > } > > /* > + > +*---------------------------------------------------------------------- > +------ > + * OvsCalculateTCPChecksum > + * Calculate TCP checksum > + > +*---------------------------------------------------------------------- > +------ > + */ > +static __inline NDIS_STATUS > +OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER > curNb) { > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo); > + UINT16 checkSum; > + > + /* Check if TCP Checksum has been calculated by NIC */ > + if (csumInfo.Receive.TcpChecksumSucceeded) { > + return NDIS_STATUS_SUCCESS; > + } > + > + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), > + NULL, 1, 0); > + > + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { > + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); > + UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4; > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); > + checkSum = tcp->check; > + > + tcp->check = 0; > + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, > + IPPROTO_TCP, (UINT16)l4Payload); > + tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload), > + sizeof(EthHdr) + ip->ihl * 4); > + if (checkSum != tcp->check) { > + return NDIS_STATUS_INVALID_PACKET; > + } > + } > + > + csumInfo.Receive.TcpChecksumSucceeded = 1; > + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = > csumInfo.Value; > + return NDIS_STATUS_SUCCESS; > +} > + > +/* > * -------------------------------------------------------------------------- > * OvsDecapStt -- > * Decapsulates an STT packet. > @@ -311,6 +368,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT > switchContext, > SttHdr *sttHdr; > char *sttBuf[STT_HDR_LEN]; > UINT32 advanceCnt, hdrLen; > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); @@ -321,6 +379,20 @@ > OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > return NDIS_STATUS_INVALID_LENGTH; > } > > + /* Verify outer TCP Checksum */ > + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > + TcpIpChecksumNetBufferListInfo); > + > + /* Check if NIC has indicated TCP checksum failure */ > + if (csumInfo.Receive.TcpChecksumFailed) { > + return NDIS_STATUS_INVALID_PACKET; > + } > + > + /* Calculate the TCP Checksum */ > + status = OvsCalculateTCPChecksum(curNbl, curNb); > + if (status != NDIS_STATUS_SUCCESS) { > + return NDIS_STATUS_INVALID_PACKET; > + } > + > /* Skip Eth header */ > hdrLen = sizeof(EthHdr); > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); @@ -353,6 > +425,61 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > hdrLen = STT_HDR_LEN; > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > advanceCnt += hdrLen; > + > + /* Verify checksum for inner packet if it's required */ > + BOOLEAN innerChecksumVerified = sttHdr->flags & STT_CSUM_VERIFIED; > + > + if (!innerChecksumVerified) { > + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), > + NULL, 1, 0); > + > + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { > + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); > + ip->check = 0; > + ip->check = IPChecksum((UINT8 *)ip, sizeof *ip, 0); > + UINT16 l4Payload = (UINT16)ntohs(ip->tot_len) - ip->ihl * 4; > + UINT32 offset = sizeof(EthHdr) + sizeof(IPHdr); [Alin Gabriel Serdean: ] UINT32 offset = sizeof(EthHdr) + ip->ihl * 4; > + > + if (ip->protocol == IPPROTO_TCP) { > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); > + tcp->check = 0; [Alin Gabriel Serdean: ] setting it to 0 is unnecessary > + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, > + IPPROTO_TCP, > + (UINT16)l4Payload); > + tcp->check = CalculateChecksumNB(curNb, l4Payload, offset); > + } else if (ip->protocol == IPPROTO_UDP) { > + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); > + udp->check = 0; [Alin Gabriel Serdean: ] setting it to 0 is unnecessary > + udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, > + IPPROTO_UDP, l4Payload); > + udp->check = CalculateChecksumNB(curNb, l4Payload, offset); > + } > + } > + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) { > + IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth); > + UINT32 offset = (UINT32)(sizeof *eth + sizeof *ip); > + UINT16 totalLength = (UINT16)ntohs(ip->payload_len); > + > + if (ip->nexthdr == IPPROTO_TCP) { > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); > + tcp->check = 0; [Alin Gabriel Serdean: ] setting it to 0 is unnecessary > + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, > + (UINT32 *)&ip->daddr, > + IPPROTO_TCP, totalLength); > + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); > + } > + else if (ip->nexthdr == IPPROTO_UDP) { > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); [Alin Gabriel Serdean: ] UDPHdr *udp > + tcp->check = 0; [Alin Gabriel Serdean: ] setting it to 0 is unnecessary > + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, > + (UINT32 *)&ip->daddr, > + IPPROTO_UDP, totalLength); > + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); > + } > + } > + > + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; > + } > > *newNbl = OvsPartialCopyNBL(switchContext, curNbl, > OVS_DEFAULT_COPY_SIZE, > 0, FALSE /*copy NBL info*/); > -- > 2.5.0.windows.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev
diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c index b6272c3..f9d0afb 100644 --- a/datapath-windows/ovsext/Stt.c +++ b/datapath-windows/ovsext/Stt.c @@ -150,6 +150,17 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, UNREFERENCED_PARAMETER(layers); curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + + /* Verify if inner checksum is verified */ + BOOLEAN innerChecksumVerified = FALSE; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo); + + innerChecksumVerified = csumInfo.Transmit.IpHeaderChecksum == 0 && + csumInfo.Transmit.TcpChecksum == 0 && + csumInfo.Transmit.UdpChecksum == 0; + if (layers->isTcp) { NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; @@ -266,6 +277,10 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, /* XXX need to peek into the inner packet, hard code for now */ sttHdr->flags = STT_PROTO_IPV4; + /* XXX need to handle Checksum partial flag */ + if (innerChecksumVerified) { + sttHdr->flags |= STT_CSUM_VERIFIED; + } sttHdr->l4Offset = 0; sttHdr->reserved = 0; @@ -276,13 +291,13 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, /* Zero out stt padding */ *(uint16 *)(sttHdr + 1) = 0; - /* Calculate software tcp checksum */ - outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen, - sizeof(EthHdr) + sizeof(IPHdr)); - if (outerTcpHdr->check == 0) { - status = NDIS_STATUS_FAILURE; - goto ret_error; - } + /* Offload IP and TCP checksum */ + csumInfo.Value = 0; + csumInfo.Transmit.IpHeaderChecksum = 1; + csumInfo.Transmit.TcpChecksum = 1; + csumInfo.Transmit.IsIPv4 = 1; + csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr + sizeof *outerIpHdr; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; return STATUS_SUCCESS; @@ -293,6 +308,48 @@ ret_error: } /* + *---------------------------------------------------------------------------- + * OvsCalculateTCPChecksum + * Calculate TCP checksum + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + UINT16 checkSum; + + /* Check if TCP Checksum has been calculated by NIC */ + if (csumInfo.Receive.TcpChecksumSucceeded) { + return NDIS_STATUS_SUCCESS; + } + + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), + NULL, 1, 0); + + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); + UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4; + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + checkSum = tcp->check; + + tcp->check = 0; + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_TCP, (UINT16)l4Payload); + tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload), + sizeof(EthHdr) + ip->ihl * 4); + if (checkSum != tcp->check) { + return NDIS_STATUS_INVALID_PACKET; + } + } + + csumInfo.Receive.TcpChecksumSucceeded = 1; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + return NDIS_STATUS_SUCCESS; +} + +/* * -------------------------------------------------------------------------- * OvsDecapStt -- * Decapsulates an STT packet. @@ -311,6 +368,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, SttHdr *sttHdr; char *sttBuf[STT_HDR_LEN]; UINT32 advanceCnt, hdrLen; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); @@ -321,6 +379,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, return NDIS_STATUS_INVALID_LENGTH; } + /* Verify outer TCP Checksum */ + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + + /* Check if NIC has indicated TCP checksum failure */ + if (csumInfo.Receive.TcpChecksumFailed) { + return NDIS_STATUS_INVALID_PACKET; + } + + /* Calculate the TCP Checksum */ + status = OvsCalculateTCPChecksum(curNbl, curNb); + if (status != NDIS_STATUS_SUCCESS) { + return NDIS_STATUS_INVALID_PACKET; + } + /* Skip Eth header */ hdrLen = sizeof(EthHdr); NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); @@ -353,6 +425,61 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, hdrLen = STT_HDR_LEN; NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); advanceCnt += hdrLen; + + /* Verify checksum for inner packet if it's required */ + BOOLEAN innerChecksumVerified = sttHdr->flags & STT_CSUM_VERIFIED; + + if (!innerChecksumVerified) { + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), + NULL, 1, 0); + + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); + ip->check = 0; + ip->check = IPChecksum((UINT8 *)ip, sizeof *ip, 0); + UINT16 l4Payload = (UINT16)ntohs(ip->tot_len) - ip->ihl * 4; + UINT32 offset = sizeof(EthHdr) + sizeof(IPHdr); + + if (ip->protocol == IPPROTO_TCP) { + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + tcp->check = 0; + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_TCP, + (UINT16)l4Payload); + tcp->check = CalculateChecksumNB(curNb, l4Payload, offset); + } else if (ip->protocol == IPPROTO_UDP) { + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); + udp->check = 0; + udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_UDP, l4Payload); + udp->check = CalculateChecksumNB(curNb, l4Payload, offset); + } + } + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) { + IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth); + UINT32 offset = (UINT32)(sizeof *eth + sizeof *ip); + UINT16 totalLength = (UINT16)ntohs(ip->payload_len); + + if (ip->nexthdr == IPPROTO_TCP) { + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + tcp->check = 0; + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, + (UINT32 *)&ip->daddr, + IPPROTO_TCP, totalLength); + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); + } + else if (ip->nexthdr == IPPROTO_UDP) { + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + tcp->check = 0; + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, + (UINT32 *)&ip->daddr, + IPPROTO_UDP, totalLength); + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); + } + } + + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + } *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE, 0, FALSE /*copy NBL info*/);
Enable support for Checksum offloads in STT if it's enabled in the Windows VM. Signed-off-by: Sairam Venugopal <vsairam@vmware.com> --- datapath-windows/ovsext/Stt.c | 141 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 134 insertions(+), 7 deletions(-)