Message ID | 1465231499-40244-1-git-send-email-pboca@cloudbasesolutions.com |
---|---|
State | Accepted |
Headers | show |
Thanks for making the changes. Acked-by: Sairam Venugopal <vsairam@vmware.com> On 6/6/16, 9:45 AM, "Paul Boca" <pboca@cloudbasesolutions.com> wrote: >*Added OvsExtractLayers - populates only the layers field without >unnecessary >memory operations for flow part >*If in STT header the flags are 0 then force packets checksums calculation >on receive. >*Ensure correct pseudo checksum is set for LSO both on send and receive. >Linux includes the segment length to TCP pseudo-checksum conforming to >RFC 793 but in case of LSO Windows expects this to be only on >Source IP Address, Destination IP Address, and Protocol. >*Fragment expiration on rx side of STT was set to 30 seconds, but the >correct >timeout would be TTL of the packet > >Signed-off-by: Paul-Daniel Boca <pboca@cloudbasesolutions.com> >--- >V2: Use STT_ENTRY_TIMEOUT on STT reassmble. > Small refactoring and added LSO comment with specific requirements. >--- > datapath-windows/ovsext/Flow.c | 243 >++++++++++++++++++++++++++++----- > datapath-windows/ovsext/Flow.h | 2 + > datapath-windows/ovsext/PacketParser.c | 97 +++++++------ > datapath-windows/ovsext/PacketParser.h | 8 +- > datapath-windows/ovsext/Stt.c | 124 +++++++++++++---- > datapath-windows/ovsext/User.c | 17 ++- > 6 files changed, 377 insertions(+), 114 deletions(-) > >diff --git a/datapath-windows/ovsext/Flow.c >b/datapath-windows/ovsext/Flow.c >index c2e0227..2a91855 100644 >--- a/datapath-windows/ovsext/Flow.c >+++ b/datapath-windows/ovsext/Flow.c >@@ -1570,7 +1570,8 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > > ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]); > RtlCopyMemory(&icmp6FlowPutKey->ndTarget, >- ndKey->nd_target, sizeof >(icmp6FlowPutKey->ndTarget)); >+ ndKey->nd_target, >+ sizeof (icmp6FlowPutKey->ndTarget)); > RtlCopyMemory(icmp6FlowPutKey->arpSha, > ndKey->nd_sll, ETH_ADDR_LEN); > RtlCopyMemory(icmp6FlowPutKey->arpTha, >@@ -1600,8 +1601,10 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > arpFlowPutKey->nwSrc = arpKey->arp_sip; > arpFlowPutKey->nwDst = arpKey->arp_tip; > >- RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, >ETH_ADDR_LEN); >- RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, >ETH_ADDR_LEN); >+ RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, >+ ETH_ADDR_LEN); >+ RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, >+ ETH_ADDR_LEN); > /* Kernel datapath assumes 'arpFlowPutKey->nwProto' to be in >host > * order. */ > arpFlowPutKey->nwProto = (UINT8)ntohs((arpKey->arp_op)); >@@ -1850,29 +1853,195 @@ OvsGetFlowMetadata(OvsFlowKey *key, > return status; > } > >+ > /* >- >*------------------------------------------------------------------------- >--- >- * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', >and >- * 'ofp_in_port'. >- * >- * Initializes 'packet' header pointers as follows: >- * >- * - packet->l2 to the start of the Ethernet header. >- * >- * - packet->l3 to just past the Ethernet header, or just past the >- * vlan_header if one is present, to the first byte of the payload >of the >- * Ethernet frame. >- * >- * - packet->l4 to just past the IPv4 header, if one is present and >has a >- * correct length, and otherwise NULL. >- * >- * - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if >one is >- * present and has a correct length, and otherwise NULL. >- * >- * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data >cannot be accessed >- * (e.g. if Pkt_CopyBytesOut() returns an error). >- >*------------------------------------------------------------------------- >--- >- */ >+*------------------------------------------------------------------------ >---- >+* Initializes 'layers' members from 'packet' >+* >+* Initializes 'layers' header pointers as follows: >+* >+* - layers->l2 to the start of the Ethernet header. >+* >+* - layers->l3 to just past the Ethernet header, or just past the >+* vlan_header if one is present, to the first byte of the payload >of the >+* Ethernet frame. >+* >+* - layers->l4 to just past the IPv4 header, if one is present and >has a >+* correct length, and otherwise NULL. >+* >+* - layers->l7 to just past the TCP, UDP, SCTP or ICMP header, if one >is >+* present and has a correct length, and otherwise NULL. >+* >+* - layers->isIPv4/isIPv6/isTcp/isUdp/isSctp based on the packet type >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data cannot be accessed. >+* (e.g. if OvsParseIPv6() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ >+NDIS_STATUS >+OvsExtractLayers(const NET_BUFFER_LIST *packet, >+ POVS_PACKET_HDR_INFO layers) >+{ >+ struct Eth_Header *eth; >+ UINT8 offset = 0; >+ PVOID vlanTagValue; >+ ovs_be16 dlType; >+ >+ layers->value = 0; >+ >+ /* Link layer. */ >+ eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >+ >+ /* >+ * vlan_tci. >+ */ >+ vlanTagValue = NET_BUFFER_LIST_INFO(packet, >Ieee8021QNetBufferListInfo); >+ if (!vlanTagValue) { >+ if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { >+ offset = sizeof(Eth_802_1pq_Tag); >+ } >+ >+ /* >+ * XXX Please note after this point, src mac and dst mac should >+ * not be accessed through eth >+ */ >+ eth = (Eth_Header *)((UINT8 *)eth + offset); >+ } >+ >+ /* >+ * dl_type. >+ * >+ * XXX assume that at least the first >+ * 12 bytes of received packets are mapped. This code has the >stronger >+ * assumption that at least the first 22 bytes of 'packet' is mapped >(if my >+ * arithmetic is right). >+ */ >+ if (ETH_TYPENOT8023(eth->dix.typeNBO)) { >+ dlType = eth->dix.typeNBO; >+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >+ } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && >+ eth->e802_3.llc.dsap == 0xaa && >+ eth->e802_3.llc.ssap == 0xaa && >+ eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && >+ eth->e802_3.snap.snapOrg[0] == 0x00 && >+ eth->e802_3.snap.snapOrg[1] == 0x00 && >+ eth->e802_3.snap.snapOrg[2] == 0x00) { >+ dlType = eth->e802_3.snap.snapType.typeNBO; >+ layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; >+ } else { >+ dlType = htons(OVSWIN_DL_TYPE_NONE); >+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >+ } >+ >+ /* Network layer. */ >+ if (dlType == htons(ETH_TYPE_IPV4)) { >+ struct IPHdr ip_storage; >+ const struct IPHdr *nh; >+ >+ layers->isIPv4 = 1; >+ nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); >+ if (nh) { >+ layers->l4Offset = layers->l3Offset + nh->ihl * 4; >+ >+ if (!(nh->frag_off & htons(IP_OFFSET))) { >+ if (nh->protocol == SOCKET_IPPROTO_TCP) { >+ OvsParseTcp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_UDP) { >+ OvsParseUdp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_SCTP) { >+ OvsParseSctp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_ICMP) { >+ ICMPHdr icmpStorage; >+ const ICMPHdr *icmp; >+ >+ icmp = OvsGetIcmp(packet, layers->l4Offset, >&icmpStorage); >+ if (icmp) { >+ layers->l7Offset = layers->l4Offset + sizeof >*icmp; >+ } >+ } >+ } >+ } >+ } else if (dlType == htons(ETH_TYPE_IPV6)) { >+ NDIS_STATUS status; >+ Ipv6Key ipv6Key; >+ >+ status = OvsParseIPv6(packet, &ipv6Key, layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ layers->isIPv6 = 1; >+ >+ if (ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { >+ OvsParseTcp(packet, &(ipv6Key.l4), layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { >+ OvsParseUdp(packet, &(ipv6Key.l4), layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { >+ OvsParseSctp(packet, &ipv6Key.l4, layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >+ Icmp6Key icmp6Key; >+ OvsParseIcmpV6(packet, NULL, &icmp6Key, layers); >+ } >+ } else if (OvsEthertypeIsMpls(dlType)) { >+ MPLSHdr mplsStorage; >+ const MPLSHdr *mpls; >+ >+ /* >+ * In the presence of an MPLS label stack the end of the L2 >+ * header and the beginning of the L3 header differ. >+ * >+ * A network packet may contain multiple MPLS labels, but we >+ * are only interested in the topmost label stack entry. >+ * >+ * Advance network header to the beginning of the L3 header. >+ * layers->l3Offset corresponds to the end of the L2 header. >+ */ >+ for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) { >+ mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage); >+ if (!mpls) { >+ break; >+ } >+ >+ layers->l3Offset += MPLS_HLEN; >+ layers->l4Offset += MPLS_HLEN; >+ >+ if (mpls->lse & htonl(MPLS_BOS_MASK)) { >+ /* >+ * Bottom of Stack bit is set, which means there are no >+ * remaining MPLS labels in the packet. >+ */ >+ break; >+ } >+ } >+ } >+ >+ return NDIS_STATUS_SUCCESS; >+} >+ >+/* >+*------------------------------------------------------------------------ >---- >+* Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and >+* 'ofp_in_port'. >+* >+* Initializes 'packet' header pointers as follows: >+* >+* - packet->l2 to the start of the Ethernet header. >+* >+* - packet->l3 to just past the Ethernet header, or just past the >+* vlan_header if one is present, to the first byte of the payload >of the >+* Ethernet frame. >+* >+* - packet->l4 to just past the IPv4 header, if one is present and >has a >+* correct length, and otherwise NULL. >+* >+* - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one >is >+* present and has a correct length, and otherwise NULL. >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data cannot be accessed. >+* (e.g. if Pkt_CopyBytesOut() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ > NDIS_STATUS > OvsExtractFlow(const NET_BUFFER_LIST *packet, > UINT32 inPort, >@@ -1904,8 +2073,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > /* Link layer. */ > eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >- memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); >- memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); >+ RtlCopyMemory(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); >+ RtlCopyMemory(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); > > /* > * vlan_tci. >@@ -1927,8 +2096,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > flow->l2.vlanTci = 0; > } > /* >- * XXX >- * Please note after this point, src mac and dst mac should >+ * XXX Please note after this point, src mac and dst mac should > * not be accessed through eth > */ > eth = (Eth_Header *)((UINT8 *)eth + offset); >@@ -1959,7 +2127,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > layers->l3Offset = ETH_HEADER_LEN_DIX + offset; > } > >- flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - >flow->l2.offset; >+ flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE >+ - flow->l2.offset; > /* Network layer. */ > if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { > struct IPHdr ip_storage; >@@ -2016,9 +2185,9 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { > NDIS_STATUS status; > flow->l2.keyLen += OVS_IPV6_KEY_SIZE; >- status = OvsParseIPv6(packet, flow, layers); >+ status = OvsParseIPv6(packet, &flow->ipv6Key, layers); > if (status != NDIS_STATUS_SUCCESS) { >- memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); >+ RtlZeroMemory(&flow->ipv6Key, sizeof (Ipv6Key)); > return status; > } > layers->isIPv6 = 1; >@@ -2033,7 +2202,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { > OvsParseSctp(packet, &flow->ipv6Key.l4, layers); > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >- OvsParseIcmpV6(packet, flow, layers); >+ OvsParseIcmpV6(packet, &flow->ipv6Key, &flow->icmp6Key, >layers); > flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); > } > } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { >@@ -2055,10 +2224,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } > if (arpKey->nwProto == ARPOP_REQUEST > || arpKey->nwProto == ARPOP_REPLY) { >- memcpy(&arpKey->nwSrc, arp->arp_spa, 4); >- memcpy(&arpKey->nwDst, arp->arp_tpa, 4); >- memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); >- memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); >+ RtlCopyMemory(&arpKey->nwSrc, arp->arp_spa, 4); >+ RtlCopyMemory(&arpKey->nwDst, arp->arp_tpa, 4); >+ RtlCopyMemory(arpKey->arpSha, arp->arp_sha, >ETH_ADDR_LENGTH); >+ RtlCopyMemory(arpKey->arpTha, arp->arp_tha, >ETH_ADDR_LENGTH); > } > } > } else if (OvsEthertypeIsMpls(flow->l2.dlType)) { >diff --git a/datapath-windows/ovsext/Flow.h >b/datapath-windows/ovsext/Flow.h >index fb3fb59..d39db45 100644 >--- a/datapath-windows/ovsext/Flow.h >+++ b/datapath-windows/ovsext/Flow.h >@@ -53,6 +53,8 @@ NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, > > NDIS_STATUS OvsGetFlowMetadata(OvsFlowKey *key, > PNL_ATTR *keyAttrs); >+NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet, >+ POVS_PACKET_HDR_INFO layers); > NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, > OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, > OvsIPv4TunnelKey *tunKey); >diff --git a/datapath-windows/ovsext/PacketParser.c >b/datapath-windows/ovsext/PacketParser.c >index 93df342..c4a04d0 100644 >--- a/datapath-windows/ovsext/PacketParser.c >+++ b/datapath-windows/ovsext/PacketParser.c >@@ -84,14 +84,13 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, > > NDIS_STATUS > OvsParseIPv6(const NET_BUFFER_LIST *packet, >- OvsFlowKey *key, >+ Ipv6Key *ipv6Key, > POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l3Offset; > IPv6Hdr ipv6HdrStorage; > const IPv6Hdr *nh; > UINT32 nextHdr; >- Ipv6Key *flow= &key->ipv6Key; > > nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); > if (!nh) { >@@ -99,15 +98,15 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > > nextHdr = nh->nexthdr; >- memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); >- memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); >+ RtlCopyMemory(&ipv6Key->ipv6Src, nh->saddr.s6_addr, 16); >+ RtlCopyMemory(&ipv6Key->ipv6Dst, nh->daddr.s6_addr, 16); > >- flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); >- flow->ipv6Label = >+ ipv6Key->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << >4); >+ ipv6Key->ipv6Label = > ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | >nh->flow_lbl[2]; >- flow->nwTtl = nh->hop_limit; >- flow->nwProto = SOCKET_IPPROTO_NONE; >- flow->nwFrag = OVS_FRAG_TYPE_NONE; >+ ipv6Key->nwTtl = nh->hop_limit; >+ ipv6Key->nwProto = SOCKET_IPPROTO_NONE; >+ ipv6Key->nwFrag = OVS_FRAG_TYPE_NONE; > > // Parse extended headers and compute L4 offset > ofs += sizeof(IPv6Hdr); >@@ -160,9 +159,9 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > /* We only process the first fragment. */ > if (fragHdr->offlg != htons(0)) { > if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == >htons(0)) { >- flow->nwFrag = OVS_FRAG_TYPE_FIRST; >+ ipv6Key->nwFrag = OVS_FRAG_TYPE_FIRST; > } else { >- flow->nwFrag = OVS_FRAG_TYPE_LATER; >+ ipv6Key->nwFrag = OVS_FRAG_TYPE_LATER; > nextHdr = SOCKET_IPPROTO_FRAGMENT; > break; > } >@@ -170,7 +169,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > } > >- flow->nwProto = (UINT8)nextHdr; >+ ipv6Key->nwProto = (UINT8)nextHdr; > layers->l4Offset = ofs; > return NDIS_STATUS_SUCCESS; > } >@@ -183,10 +182,14 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, > TCPHdr tcpStorage; > const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); > if (tcp) { >- flow->tpSrc = tcp->source; >- flow->tpDst = tcp->dest; >- layers->isTcp = 1; >- layers->l7Offset = layers->l4Offset + 4 * tcp->doff; >+ if (flow) { >+ flow->tpSrc = tcp->source; >+ flow->tpDst = tcp->dest; >+ } >+ if (layers) { >+ layers->isTcp = 1; >+ layers->l7Offset = layers->l4Offset + 4 * tcp->doff; >+ } > } > } > >@@ -198,10 +201,14 @@ OvsParseSctp(const NET_BUFFER_LIST *packet, > SCTPHdr sctpStorage; > const SCTPHdr *sctp = OvsGetSctp(packet, layers->l4Offset, >&sctpStorage); > if (sctp) { >- flow->tpSrc = sctp->source; >- flow->tpDst = sctp->dest; >- layers->isSctp = 1; >- layers->l7Offset = layers->l4Offset + sizeof *sctp; >+ if (flow) { >+ flow->tpSrc = sctp->source; >+ flow->tpDst = sctp->dest; >+ } >+ if (layers) { >+ layers->isSctp = 1; >+ layers->l7Offset = layers->l4Offset + sizeof *sctp; >+ } > } > } > >@@ -213,29 +220,33 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, > UDPHdr udpStorage; > const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); > if (udp) { >- flow->tpSrc = udp->source; >- flow->tpDst = udp->dest; >- layers->isUdp = 1; >- if (udp->check == 0) { >- layers->udpCsumZero = 1; >+ if (flow) { >+ flow->tpSrc = udp->source; >+ flow->tpDst = udp->dest; >+ } >+ if (layers) { >+ layers->isUdp = 1; >+ if (udp->check == 0) { >+ layers->udpCsumZero = 1; >+ } >+ layers->l7Offset = layers->l4Offset + sizeof *udp; > } >- layers->l7Offset = layers->l4Offset + sizeof *udp; > } > } > > NDIS_STATUS > OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >- OvsFlowKey *key, >- POVS_PACKET_HDR_INFO layers) >+ Ipv6Key *ipv6Key, >+ Icmp6Key *icmp6Key, >+ POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l4Offset; > ICMPHdr icmpStorage; > const ICMPHdr *icmp; >- Icmp6Key *flow = &key->icmp6Key; > >- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); >- memset(flow->arpSha, 0, sizeof(flow->arpSha)); >- memset(flow->arpTha, 0, sizeof(flow->arpTha)); >+ memset(&icmp6Key->ndTarget, 0, sizeof(icmp6Key->ndTarget)); >+ memset(icmp6Key->arpSha, 0, sizeof(icmp6Key->arpSha)); >+ memset(icmp6Key->arpTha, 0, sizeof(icmp6Key->arpTha)); > > icmp = OvsGetIcmp(packet, ofs, &icmpStorage); > if (!icmp) { >@@ -247,8 +258,10 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * The ICMPv6 type and code fields use the 16-bit transport port > * fields, so we need to store them in 16-bit network byte order. > */ >- key->ipv6Key.l4.tpSrc = htons(icmp->type); >- key->ipv6Key.l4.tpDst = htons(icmp->code); >+ if (ipv6Key) { >+ ipv6Key->l4.tpSrc = htons(icmp->type); >+ ipv6Key->l4.tpDst = htons(icmp->code); >+ } > > if (icmp->code == 0 && > (icmp->type == ND_NEIGHBOR_SOLICIT || >@@ -261,7 +274,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > if (!ndTarget) { > return NDIS_STATUS_FAILURE; > } >- flow->ndTarget = *ndTarget; >+ icmp6Key->ndTarget = *ndTarget; > > while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { > /* >@@ -288,14 +301,14 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * layer option is specified twice. > */ > if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { >- if (Eth_IsNullAddr(flow->arpSha)) { >- memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); >+ if (Eth_IsNullAddr(icmp6Key->arpSha)) { >+ memcpy(icmp6Key->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } > } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen >== 8) { >- if (Eth_IsNullAddr(flow->arpTha)) { >- memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); >+ if (Eth_IsNullAddr(icmp6Key->arpTha)) { >+ memcpy(icmp6Key->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } >@@ -309,9 +322,9 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > return NDIS_STATUS_SUCCESS; > > invalid: >- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); >- memset(flow->arpSha, 0, sizeof(flow->arpSha)); >- memset(flow->arpTha, 0, sizeof(flow->arpTha)); >+ RtlZeroMemory(&icmp6Key->ndTarget, sizeof(icmp6Key->ndTarget)); >+ RtlZeroMemory(icmp6Key->arpSha, sizeof(icmp6Key->arpSha)); >+ RtlZeroMemory(icmp6Key->arpTha, sizeof(icmp6Key->arpTha)); > > return NDIS_STATUS_FAILURE; > } >diff --git a/datapath-windows/ovsext/PacketParser.h >b/datapath-windows/ovsext/PacketParser.h >index 47d227f..f1d7f28 100644 >--- a/datapath-windows/ovsext/PacketParser.h >+++ b/datapath-windows/ovsext/PacketParser.h >@@ -22,7 +22,7 @@ > > const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, > UINT32 SrcOffset, VOID *storage); >-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, >+NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, Ipv6Key *key, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >@@ -30,8 +30,10 @@ VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key >*flow, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseSctp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey >*key, >- POVS_PACKET_HDR_INFO layers); >+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >+ Ipv6Key *ipv6Key, >+ Icmp6Key *flow, >+ POVS_PACKET_HDR_INFO layers); > > static __inline ULONG > OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) >diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c >index dd7bf92..c93db75 100644 >--- a/datapath-windows/ovsext/Stt.c >+++ b/datapath-windows/ovsext/Stt.c >@@ -194,7 +194,7 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > if (layers->isIPv4) { > IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); > if (!ip->tot_len) { >- ip->tot_len = htons(innerFrameLen - sizeof(EthHdr)); >+ ip->tot_len = htons(innerFrameLen - layers->l3Offset); > } > if (!ip->check) { > ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0); >@@ -231,8 +231,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > * memory. > */ > curMdl = NET_BUFFER_CURRENT_MDL(curNb); >- ASSERT((int) (MmGetMdlByteCount(curMdl) - >NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >- >= (int) headRoom); >+ ASSERT((int) (MmGetMdlByteCount(curMdl) - >+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >= (int) headRoom); > > buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); > if (!buf) { >@@ -288,12 +288,12 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > /* Calculate pseudo header chksum */ > tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; > ASSERT(tcpChksumLen < 65535); >- outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) >&tunKey->dst, >- IPPROTO_TCP, (uint16) >tcpChksumLen); > sttHdr->version = 0; > > /* Set STT Header */ > sttHdr->flags = 0; >+ sttHdr->mss = 0; >+ sttHdr->l4Offset = 0; > if (innerPartialChecksum) { > sttHdr->flags |= STT_CSUM_PARTIAL; > if (layers->isIPv4) { >@@ -327,8 +327,22 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo) = >csumInfo.Value; > >- UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - >sizeof(TCPHdr); >+ UINT32 encapMss = OvsGetExternalMtu(switchContext) >+ - sizeof(IPHdr) >+ - sizeof(TCPHdr); > if (ipTotalLen > encapMss) { >+ /* For Windows LSO, the TCP pseudo checksum must contain Source >IP >+ * Address, Destination IP Address, and Protocol; the length of >the >+ * payload is excluded because the underlying miniport driver >and NIC >+ * generate TCP segments from the large packet that is passed >down by >+ * the TCP/IP transport, the transport does not know the size of >the >+ * TCP payload for each TCP segment and therefore cannot include >the >+ * TCP Length in the pseudo-header. >+ */ >+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, >+ (uint32 *) &tunKey->dst, >+ IPPROTO_TCP, (uint16) 0); >+ > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; > lsoInfo.LsoV2Transmit.MSS = encapMss; >@@ -336,6 +350,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; > NET_BUFFER_LIST_INFO(curNbl, > TcpLargeSendNetBufferListInfo) = >lsoInfo.Value; >+ } else { >+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, >+ (uint32 *) &tunKey->dst, >+ IPPROTO_TCP, >+ (uint16) tcpChksumLen); > } > > return STATUS_SUCCESS; >@@ -655,7 +674,8 @@ handle_error: > if (lastPacket) { > /* Retrieve the original STT header */ > NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof >(SttHdr)); >- targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >pktFragEntry->packetBuf, >+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >+ pktFragEntry->packetBuf, > innerPacketLen); > > /* Delete this entry and free up the memory/ */ >@@ -668,16 +688,32 @@ handle_error: > return lastPacket ? targetPNbl : NULL; > } > >-VOID >-OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr) >+ >+/* >+*------------------------------------------------------------------------ >---- >+* OvsDecapSetOffloads >+* Processes received STT header and sets >TcpIpChecksumNetBufferListInfo >+* accordingly. >+* For TCP packets with total length bigger than destination MSS it >+* populates TcpLargeSendNetBufferListInfo. >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data is invalid. >+* (e.g. if OvsExtractLayers() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ >+NDIS_STATUS >+OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr) > { > if ((sttHdr->flags & STT_CSUM_VERIFIED) > || !(sttHdr->flags & STT_CSUM_PARTIAL)) { >- return; >+ return NDIS_STATUS_SUCCESS; > } > >- UINT8 protoType; >+ NDIS_STATUS status; > NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; >+ UINT8 protoType; >+ > csumInfo.Value = 0; > csumInfo.Transmit.IpHeaderChecksum = 0; > csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset; >@@ -703,25 +739,66 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr >*sttHdr) > csumInfo.Transmit.IsIPv6 = 1; > csumInfo.Transmit.UdpChecksum = 1; > } >- NET_BUFFER_LIST_INFO(curNbl, >+ NET_BUFFER_LIST_INFO(*curNbl, > TcpIpChecksumNetBufferListInfo) = >csumInfo.Value; > >- if (sttHdr->mss) { >+ if (sttHdr->mss && (sttHdr->flags & STT_PROTO_TCP)) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; >+ PMDL curMdl = NULL; >+ PNET_BUFFER curNb; >+ PUINT8 buf = NULL; >+ OVS_PACKET_HDR_INFO layers; >+ >+ status = OvsExtractLayers(*curNbl, &layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ >+ curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl); >+ curMdl = NET_BUFFER_CURRENT_MDL(curNb); >+ >+ buf = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, >+ LowPagePriority); >+ buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >+ >+ // apply pseudo checksum on extracted packet >+ if (sttHdr->flags & STT_PROTO_IPV4) { >+ IPHdr *ipHdr; >+ TCPHdr *tcpHdr; >+ >+ ipHdr = (IPHdr *)(buf + layers.l3Offset); >+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset); >+ >+ tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr, >+ (uint32 *)&ipHdr->daddr, >+ IPPROTO_TCP, 0); >+ } else { >+ IPv6Hdr *ipHdr; >+ TCPHdr *tcpHdr; >+ >+ ipHdr = (IPv6Hdr *)(buf + layers.l3Offset); >+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset); >+ >+ tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr, >+ (UINT32*)&ipHdr->daddr, >+ IPPROTO_TCP, 0); >+ } >+ >+ // setup LSO > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset; >- lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU >- - sizeof(IPHdr) >- - sizeof(TCPHdr); >+ lsoInfo.LsoV2Transmit.MSS = ntohs(sttHdr->mss); > lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; > if (sttHdr->flags & STT_PROTO_IPV4) { > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; > } else { > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6; > } >- NET_BUFFER_LIST_INFO(curNbl, >+ NET_BUFFER_LIST_INFO(*curNbl, > TcpLargeSendNetBufferListInfo) = >lsoInfo.Value; > } >+ >+ return NDIS_STATUS_SUCCESS; > } > > /* >@@ -736,15 +813,14 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsIPv4TunnelKey *tunKey, > PNET_BUFFER_LIST *newNbl) > { >- NDIS_STATUS status = NDIS_STATUS_FAILURE; >- PNET_BUFFER curNb, newNb; >+ NDIS_STATUS status; >+ PNET_BUFFER curNb; > IPHdr *ipHdr; > char *ipBuf[sizeof(IPHdr)]; > SttHdr stt; > SttHdr *sttHdr; > char *sttBuf[STT_HDR_LEN]; > UINT32 advanceCnt, hdrLen; >- BOOLEAN isLsoPacket = FALSE; > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); >@@ -767,7 +843,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); > > /* Skip IP & TCP headers */ >- hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), >+ hdrLen = (ipHdr->ihl * 4) + (tcp->doff * 4); > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > advanceCnt += hdrLen; > >@@ -775,7 +851,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT); > UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len) > - (ipHdr->ihl * 4) >- - (sizeof * tcp); >+ - (tcp->doff * 4); > > /* Check if incoming packet requires reassembly */ > if (totalLen != payloadLen) { >@@ -788,7 +864,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > } > > *newNbl = pNbl; >- isLsoPacket = TRUE; > } else { > /* STT Header */ > sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, >@@ -812,7 +887,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsCompleteNBL(switchContext, *newNbl, TRUE); > return NDIS_STATUS_FAILURE; > } >- newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); > > ASSERT(sttHdr); > >@@ -826,7 +900,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > tunKey->pad = 0; > > /* Set Checksum and LSO offload flags */ >- OvsDecapSetOffloads(*newNbl, sttHdr); >+ OvsDecapSetOffloads(newNbl, sttHdr); > > return NDIS_STATUS_SUCCESS; > } >diff --git a/datapath-windows/ovsext/User.c >b/datapath-windows/ovsext/User.c >index 92a71e1..c7ac284 100644 >--- a/datapath-windows/ovsext/User.c >+++ b/datapath-windows/ovsext/User.c >@@ -768,7 +768,8 @@ OvsCreateAndAddPackets(PVOID userData, > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; > UINT32 packetLength; > >- tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, >TcpLargeSendNetBufferListInfo); >+ tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, >+ >TcpLargeSendNetBufferListInfo); > nb = NET_BUFFER_LIST_FIRST_NB(nbl); > packetLength = NET_BUFFER_DATA_LENGTH(nb); > >@@ -870,7 +871,8 @@ OvsCompletePacketHeader(UINT8 *packet, > (UINT32 >*)&ipHdr->DestinationAddress, > IPPROTO_TCP, >hdrInfoOut->l4PayLoad); > } else { >- PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + >hdrInfoIn->l3Offset); >+ PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + >+ hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = > (UINT16)(ntohs(ipv6Hdr->PayloadLength) + > hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)- >@@ -884,9 +886,9 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded = 1; > ovsUserStats.recalTcpCsum++; > } else if (!isRecv) { >- if (csumInfo.Transmit.TcpChecksum) { >+ if (hdrInfoIn->isTcp && csumInfo.Transmit.TcpChecksum) { > hdrInfoOut->tcpCsumNeeded = 1; >- } else if (csumInfo.Transmit.UdpChecksum) { >+ } else if (hdrInfoIn->isUdp && csumInfo.Transmit.UdpChecksum) { > hdrInfoOut->udpCsumNeeded = 1; > } > if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) { >@@ -896,7 +898,8 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; > #endif > if (hdrInfoIn->isIPv4) { >- PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + >hdrInfoIn->l3Offset); >+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + >+ hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = >(UINT16)(ntohs(ipHdr->TotalLength) - > (ipHdr->HeaderLength << 2)); > #ifdef DBG >@@ -1004,8 +1007,8 @@ OvsCreateQueueNlPacket(PVOID userData, > csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, >TcpIpChecksumNetBufferListInfo); > > if (isRecv && (csumInfo.Receive.TcpChecksumFailed || >- (csumInfo.Receive.UdpChecksumFailed && >!hdrInfo->udpCsumZero) || >- csumInfo.Receive.IpChecksumFailed)) { >+ (csumInfo.Receive.UdpChecksumFailed && >!hdrInfo->udpCsumZero) || >+ csumInfo.Receive.IpChecksumFailed)) { > OVS_LOG_INFO("Packet dropped due to checksum failure."); > ovsUserStats.dropDuetoChecksum++; > return NULL; >-- >2.7.2.windows.1 >_______________________________________________ >dev mailing list >dev@openvswitch.org >https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >n_listinfo_dev&d=CwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Dc >ruz40PROJ40ROzSpxyQSLw6fcrOWpJgEcEmNR3JEQ&m=n7gnXMsR2UrNtQVxrxnTBjwnREGH51 >0CQQKFRzywjr8&s=ZrsjgTjpUOOE_2up2V7PwwHo9VOuqVWBzyGzA1tw6es&e=
diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index c2e0227..2a91855 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -1570,7 +1570,8 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]); RtlCopyMemory(&icmp6FlowPutKey->ndTarget, - ndKey->nd_target, sizeof (icmp6FlowPutKey->ndTarget)); + ndKey->nd_target, + sizeof (icmp6FlowPutKey->ndTarget)); RtlCopyMemory(icmp6FlowPutKey->arpSha, ndKey->nd_sll, ETH_ADDR_LEN); RtlCopyMemory(icmp6FlowPutKey->arpTha, @@ -1600,8 +1601,10 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, arpFlowPutKey->nwSrc = arpKey->arp_sip; arpFlowPutKey->nwDst = arpKey->arp_tip; - RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, ETH_ADDR_LEN); - RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, ETH_ADDR_LEN); + RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, + ETH_ADDR_LEN); + RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, + ETH_ADDR_LEN); /* Kernel datapath assumes 'arpFlowPutKey->nwProto' to be in host * order. */ arpFlowPutKey->nwProto = (UINT8)ntohs((arpKey->arp_op)); @@ -1850,29 +1853,195 @@ OvsGetFlowMetadata(OvsFlowKey *key, return status; } + /* - *---------------------------------------------------------------------------- - * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and - * 'ofp_in_port'. - * - * Initializes 'packet' header pointers as follows: - * - * - packet->l2 to the start of the Ethernet header. - * - * - packet->l3 to just past the Ethernet header, or just past the - * vlan_header if one is present, to the first byte of the payload of the - * Ethernet frame. - * - * - packet->l4 to just past the IPv4 header, if one is present and has a - * correct length, and otherwise NULL. - * - * - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is - * present and has a correct length, and otherwise NULL. - * - * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be accessed - * (e.g. if Pkt_CopyBytesOut() returns an error). - *---------------------------------------------------------------------------- - */ +*---------------------------------------------------------------------------- +* Initializes 'layers' members from 'packet' +* +* Initializes 'layers' header pointers as follows: +* +* - layers->l2 to the start of the Ethernet header. +* +* - layers->l3 to just past the Ethernet header, or just past the +* vlan_header if one is present, to the first byte of the payload of the +* Ethernet frame. +* +* - layers->l4 to just past the IPv4 header, if one is present and has a +* correct length, and otherwise NULL. +* +* - layers->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is +* present and has a correct length, and otherwise NULL. +* +* - layers->isIPv4/isIPv6/isTcp/isUdp/isSctp based on the packet type +* +* Returns NDIS_STATUS_SUCCESS normally. +* Fails only if packet data cannot be accessed. +* (e.g. if OvsParseIPv6() returns an error). +*---------------------------------------------------------------------------- +*/ +NDIS_STATUS +OvsExtractLayers(const NET_BUFFER_LIST *packet, + POVS_PACKET_HDR_INFO layers) +{ + struct Eth_Header *eth; + UINT8 offset = 0; + PVOID vlanTagValue; + ovs_be16 dlType; + + layers->value = 0; + + /* Link layer. */ + eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); + + /* + * vlan_tci. + */ + vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo); + if (!vlanTagValue) { + if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { + offset = sizeof(Eth_802_1pq_Tag); + } + + /* + * XXX Please note after this point, src mac and dst mac should + * not be accessed through eth + */ + eth = (Eth_Header *)((UINT8 *)eth + offset); + } + + /* + * dl_type. + * + * XXX assume that at least the first + * 12 bytes of received packets are mapped. This code has the stronger + * assumption that at least the first 22 bytes of 'packet' is mapped (if my + * arithmetic is right). + */ + if (ETH_TYPENOT8023(eth->dix.typeNBO)) { + dlType = eth->dix.typeNBO; + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; + } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && + eth->e802_3.llc.dsap == 0xaa && + eth->e802_3.llc.ssap == 0xaa && + eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && + eth->e802_3.snap.snapOrg[0] == 0x00 && + eth->e802_3.snap.snapOrg[1] == 0x00 && + eth->e802_3.snap.snapOrg[2] == 0x00) { + dlType = eth->e802_3.snap.snapType.typeNBO; + layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; + } else { + dlType = htons(OVSWIN_DL_TYPE_NONE); + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; + } + + /* Network layer. */ + if (dlType == htons(ETH_TYPE_IPV4)) { + struct IPHdr ip_storage; + const struct IPHdr *nh; + + layers->isIPv4 = 1; + nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); + if (nh) { + layers->l4Offset = layers->l3Offset + nh->ihl * 4; + + if (!(nh->frag_off & htons(IP_OFFSET))) { + if (nh->protocol == SOCKET_IPPROTO_TCP) { + OvsParseTcp(packet, NULL, layers); + } else if (nh->protocol == SOCKET_IPPROTO_UDP) { + OvsParseUdp(packet, NULL, layers); + } else if (nh->protocol == SOCKET_IPPROTO_SCTP) { + OvsParseSctp(packet, NULL, layers); + } else if (nh->protocol == SOCKET_IPPROTO_ICMP) { + ICMPHdr icmpStorage; + const ICMPHdr *icmp; + + icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage); + if (icmp) { + layers->l7Offset = layers->l4Offset + sizeof *icmp; + } + } + } + } + } else if (dlType == htons(ETH_TYPE_IPV6)) { + NDIS_STATUS status; + Ipv6Key ipv6Key; + + status = OvsParseIPv6(packet, &ipv6Key, layers); + if (status != NDIS_STATUS_SUCCESS) { + return status; + } + layers->isIPv6 = 1; + + if (ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { + OvsParseTcp(packet, &(ipv6Key.l4), layers); + } else if (ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { + OvsParseUdp(packet, &(ipv6Key.l4), layers); + } else if (ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { + OvsParseSctp(packet, &ipv6Key.l4, layers); + } else if (ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { + Icmp6Key icmp6Key; + OvsParseIcmpV6(packet, NULL, &icmp6Key, layers); + } + } else if (OvsEthertypeIsMpls(dlType)) { + MPLSHdr mplsStorage; + const MPLSHdr *mpls; + + /* + * In the presence of an MPLS label stack the end of the L2 + * header and the beginning of the L3 header differ. + * + * A network packet may contain multiple MPLS labels, but we + * are only interested in the topmost label stack entry. + * + * Advance network header to the beginning of the L3 header. + * layers->l3Offset corresponds to the end of the L2 header. + */ + for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) { + mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage); + if (!mpls) { + break; + } + + layers->l3Offset += MPLS_HLEN; + layers->l4Offset += MPLS_HLEN; + + if (mpls->lse & htonl(MPLS_BOS_MASK)) { + /* + * Bottom of Stack bit is set, which means there are no + * remaining MPLS labels in the packet. + */ + break; + } + } + } + + return NDIS_STATUS_SUCCESS; +} + +/* +*---------------------------------------------------------------------------- +* Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and +* 'ofp_in_port'. +* +* Initializes 'packet' header pointers as follows: +* +* - packet->l2 to the start of the Ethernet header. +* +* - packet->l3 to just past the Ethernet header, or just past the +* vlan_header if one is present, to the first byte of the payload of the +* Ethernet frame. +* +* - packet->l4 to just past the IPv4 header, if one is present and has a +* correct length, and otherwise NULL. +* +* - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is +* present and has a correct length, and otherwise NULL. +* +* Returns NDIS_STATUS_SUCCESS normally. +* Fails only if packet data cannot be accessed. +* (e.g. if Pkt_CopyBytesOut() returns an error). +*---------------------------------------------------------------------------- +*/ NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *packet, UINT32 inPort, @@ -1904,8 +2073,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, /* Link layer. */ eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); - memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); - memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); + RtlCopyMemory(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); + RtlCopyMemory(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); /* * vlan_tci. @@ -1927,8 +2096,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, flow->l2.vlanTci = 0; } /* - * XXX - * Please note after this point, src mac and dst mac should + * XXX Please note after this point, src mac and dst mac should * not be accessed through eth */ eth = (Eth_Header *)((UINT8 *)eth + offset); @@ -1959,7 +2127,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, layers->l3Offset = ETH_HEADER_LEN_DIX + offset; } - flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset; + flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE + - flow->l2.offset; /* Network layer. */ if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { struct IPHdr ip_storage; @@ -2016,9 +2185,9 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { NDIS_STATUS status; flow->l2.keyLen += OVS_IPV6_KEY_SIZE; - status = OvsParseIPv6(packet, flow, layers); + status = OvsParseIPv6(packet, &flow->ipv6Key, layers); if (status != NDIS_STATUS_SUCCESS) { - memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); + RtlZeroMemory(&flow->ipv6Key, sizeof (Ipv6Key)); return status; } layers->isIPv6 = 1; @@ -2033,7 +2202,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { OvsParseSctp(packet, &flow->ipv6Key.l4, layers); } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { - OvsParseIcmpV6(packet, flow, layers); + OvsParseIcmpV6(packet, &flow->ipv6Key, &flow->icmp6Key, layers); flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); } } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { @@ -2055,10 +2224,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } if (arpKey->nwProto == ARPOP_REQUEST || arpKey->nwProto == ARPOP_REPLY) { - memcpy(&arpKey->nwSrc, arp->arp_spa, 4); - memcpy(&arpKey->nwDst, arp->arp_tpa, 4); - memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); - memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); + RtlCopyMemory(&arpKey->nwSrc, arp->arp_spa, 4); + RtlCopyMemory(&arpKey->nwDst, arp->arp_tpa, 4); + RtlCopyMemory(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); + RtlCopyMemory(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); } } } else if (OvsEthertypeIsMpls(flow->l2.dlType)) { diff --git a/datapath-windows/ovsext/Flow.h b/datapath-windows/ovsext/Flow.h index fb3fb59..d39db45 100644 --- a/datapath-windows/ovsext/Flow.h +++ b/datapath-windows/ovsext/Flow.h @@ -53,6 +53,8 @@ NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, NDIS_STATUS OvsGetFlowMetadata(OvsFlowKey *key, PNL_ATTR *keyAttrs); +NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet, + POVS_PACKET_HDR_INFO layers); NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, OvsIPv4TunnelKey *tunKey); diff --git a/datapath-windows/ovsext/PacketParser.c b/datapath-windows/ovsext/PacketParser.c index 93df342..c4a04d0 100644 --- a/datapath-windows/ovsext/PacketParser.c +++ b/datapath-windows/ovsext/PacketParser.c @@ -84,14 +84,13 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, - OvsFlowKey *key, + Ipv6Key *ipv6Key, POVS_PACKET_HDR_INFO layers) { UINT16 ofs = layers->l3Offset; IPv6Hdr ipv6HdrStorage; const IPv6Hdr *nh; UINT32 nextHdr; - Ipv6Key *flow= &key->ipv6Key; nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); if (!nh) { @@ -99,15 +98,15 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, } nextHdr = nh->nexthdr; - memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); - memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); + RtlCopyMemory(&ipv6Key->ipv6Src, nh->saddr.s6_addr, 16); + RtlCopyMemory(&ipv6Key->ipv6Dst, nh->daddr.s6_addr, 16); - flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); - flow->ipv6Label = + ipv6Key->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); + ipv6Key->ipv6Label = ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | nh->flow_lbl[2]; - flow->nwTtl = nh->hop_limit; - flow->nwProto = SOCKET_IPPROTO_NONE; - flow->nwFrag = OVS_FRAG_TYPE_NONE; + ipv6Key->nwTtl = nh->hop_limit; + ipv6Key->nwProto = SOCKET_IPPROTO_NONE; + ipv6Key->nwFrag = OVS_FRAG_TYPE_NONE; // Parse extended headers and compute L4 offset ofs += sizeof(IPv6Hdr); @@ -160,9 +159,9 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, /* We only process the first fragment. */ if (fragHdr->offlg != htons(0)) { if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) { - flow->nwFrag = OVS_FRAG_TYPE_FIRST; + ipv6Key->nwFrag = OVS_FRAG_TYPE_FIRST; } else { - flow->nwFrag = OVS_FRAG_TYPE_LATER; + ipv6Key->nwFrag = OVS_FRAG_TYPE_LATER; nextHdr = SOCKET_IPPROTO_FRAGMENT; break; } @@ -170,7 +169,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, } } - flow->nwProto = (UINT8)nextHdr; + ipv6Key->nwProto = (UINT8)nextHdr; layers->l4Offset = ofs; return NDIS_STATUS_SUCCESS; } @@ -183,10 +182,14 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, TCPHdr tcpStorage; const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); if (tcp) { - flow->tpSrc = tcp->source; - flow->tpDst = tcp->dest; - layers->isTcp = 1; - layers->l7Offset = layers->l4Offset + 4 * tcp->doff; + if (flow) { + flow->tpSrc = tcp->source; + flow->tpDst = tcp->dest; + } + if (layers) { + layers->isTcp = 1; + layers->l7Offset = layers->l4Offset + 4 * tcp->doff; + } } } @@ -198,10 +201,14 @@ OvsParseSctp(const NET_BUFFER_LIST *packet, SCTPHdr sctpStorage; const SCTPHdr *sctp = OvsGetSctp(packet, layers->l4Offset, &sctpStorage); if (sctp) { - flow->tpSrc = sctp->source; - flow->tpDst = sctp->dest; - layers->isSctp = 1; - layers->l7Offset = layers->l4Offset + sizeof *sctp; + if (flow) { + flow->tpSrc = sctp->source; + flow->tpDst = sctp->dest; + } + if (layers) { + layers->isSctp = 1; + layers->l7Offset = layers->l4Offset + sizeof *sctp; + } } } @@ -213,29 +220,33 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, UDPHdr udpStorage; const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); if (udp) { - flow->tpSrc = udp->source; - flow->tpDst = udp->dest; - layers->isUdp = 1; - if (udp->check == 0) { - layers->udpCsumZero = 1; + if (flow) { + flow->tpSrc = udp->source; + flow->tpDst = udp->dest; + } + if (layers) { + layers->isUdp = 1; + if (udp->check == 0) { + layers->udpCsumZero = 1; + } + layers->l7Offset = layers->l4Offset + sizeof *udp; } - layers->l7Offset = layers->l4Offset + sizeof *udp; } } NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, - OvsFlowKey *key, - POVS_PACKET_HDR_INFO layers) + Ipv6Key *ipv6Key, + Icmp6Key *icmp6Key, + POVS_PACKET_HDR_INFO layers) { UINT16 ofs = layers->l4Offset; ICMPHdr icmpStorage; const ICMPHdr *icmp; - Icmp6Key *flow = &key->icmp6Key; - memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); - memset(flow->arpSha, 0, sizeof(flow->arpSha)); - memset(flow->arpTha, 0, sizeof(flow->arpTha)); + memset(&icmp6Key->ndTarget, 0, sizeof(icmp6Key->ndTarget)); + memset(icmp6Key->arpSha, 0, sizeof(icmp6Key->arpSha)); + memset(icmp6Key->arpTha, 0, sizeof(icmp6Key->arpTha)); icmp = OvsGetIcmp(packet, ofs, &icmpStorage); if (!icmp) { @@ -247,8 +258,10 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, * The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ - key->ipv6Key.l4.tpSrc = htons(icmp->type); - key->ipv6Key.l4.tpDst = htons(icmp->code); + if (ipv6Key) { + ipv6Key->l4.tpSrc = htons(icmp->type); + ipv6Key->l4.tpDst = htons(icmp->code); + } if (icmp->code == 0 && (icmp->type == ND_NEIGHBOR_SOLICIT || @@ -261,7 +274,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, if (!ndTarget) { return NDIS_STATUS_FAILURE; } - flow->ndTarget = *ndTarget; + icmp6Key->ndTarget = *ndTarget; while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { /* @@ -288,14 +301,14 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, * layer option is specified twice. */ if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { - if (Eth_IsNullAddr(flow->arpSha)) { - memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); + if (Eth_IsNullAddr(icmp6Key->arpSha)) { + memcpy(icmp6Key->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); } else { goto invalid; } } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) { - if (Eth_IsNullAddr(flow->arpTha)) { - memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); + if (Eth_IsNullAddr(icmp6Key->arpTha)) { + memcpy(icmp6Key->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); } else { goto invalid; } @@ -309,9 +322,9 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, return NDIS_STATUS_SUCCESS; invalid: - memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); - memset(flow->arpSha, 0, sizeof(flow->arpSha)); - memset(flow->arpTha, 0, sizeof(flow->arpTha)); + RtlZeroMemory(&icmp6Key->ndTarget, sizeof(icmp6Key->ndTarget)); + RtlZeroMemory(icmp6Key->arpSha, sizeof(icmp6Key->arpSha)); + RtlZeroMemory(icmp6Key->arpTha, sizeof(icmp6Key->arpTha)); return NDIS_STATUS_FAILURE; } diff --git a/datapath-windows/ovsext/PacketParser.h b/datapath-windows/ovsext/PacketParser.h index 47d227f..f1d7f28 100644 --- a/datapath-windows/ovsext/PacketParser.h +++ b/datapath-windows/ovsext/PacketParser.h @@ -22,7 +22,7 @@ const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, UINT32 SrcOffset, VOID *storage); -NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, +NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, Ipv6Key *key, POVS_PACKET_HDR_INFO layers); VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers); @@ -30,8 +30,10 @@ VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers); VOID OvsParseSctp(const NET_BUFFER_LIST *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers); -NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, - POVS_PACKET_HDR_INFO layers); +NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, + Ipv6Key *ipv6Key, + Icmp6Key *flow, + POVS_PACKET_HDR_INFO layers); static __inline ULONG OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c index dd7bf92..c93db75 100644 --- a/datapath-windows/ovsext/Stt.c +++ b/datapath-windows/ovsext/Stt.c @@ -194,7 +194,7 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, if (layers->isIPv4) { IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); if (!ip->tot_len) { - ip->tot_len = htons(innerFrameLen - sizeof(EthHdr)); + ip->tot_len = htons(innerFrameLen - layers->l3Offset); } if (!ip->check) { ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0); @@ -231,8 +231,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, * memory. */ curMdl = NET_BUFFER_CURRENT_MDL(curNb); - ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) - >= (int) headRoom); + ASSERT((int) (MmGetMdlByteCount(curMdl) - + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >= (int) headRoom); buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); if (!buf) { @@ -288,12 +288,12 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, /* Calculate pseudo header chksum */ tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; ASSERT(tcpChksumLen < 65535); - outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst, - IPPROTO_TCP, (uint16) tcpChksumLen); sttHdr->version = 0; /* Set STT Header */ sttHdr->flags = 0; + sttHdr->mss = 0; + sttHdr->l4Offset = 0; if (innerPartialChecksum) { sttHdr->flags |= STT_CSUM_PARTIAL; if (layers->isIPv4) { @@ -327,8 +327,22 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; - UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - sizeof(TCPHdr); + UINT32 encapMss = OvsGetExternalMtu(switchContext) + - sizeof(IPHdr) + - sizeof(TCPHdr); if (ipTotalLen > encapMss) { + /* For Windows LSO, the TCP pseudo checksum must contain Source IP + * Address, Destination IP Address, and Protocol; the length of the + * payload is excluded because the underlying miniport driver and NIC + * generate TCP segments from the large packet that is passed down by + * the TCP/IP transport, the transport does not know the size of the + * TCP payload for each TCP segment and therefore cannot include the + * TCP Length in the pseudo-header. + */ + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, + (uint32 *) &tunKey->dst, + IPPROTO_TCP, (uint16) 0); + lsoInfo.Value = 0; lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; lsoInfo.LsoV2Transmit.MSS = encapMss; @@ -336,6 +350,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; NET_BUFFER_LIST_INFO(curNbl, TcpLargeSendNetBufferListInfo) = lsoInfo.Value; + } else { + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, + (uint32 *) &tunKey->dst, + IPPROTO_TCP, + (uint16) tcpChksumLen); } return STATUS_SUCCESS; @@ -655,7 +674,8 @@ handle_error: if (lastPacket) { /* Retrieve the original STT header */ NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr)); - targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf, + targetPNbl = OvsAllocateNBLFromBuffer(switchContext, + pktFragEntry->packetBuf, innerPacketLen); /* Delete this entry and free up the memory/ */ @@ -668,16 +688,32 @@ handle_error: return lastPacket ? targetPNbl : NULL; } -VOID -OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr) + +/* +*---------------------------------------------------------------------------- +* OvsDecapSetOffloads +* Processes received STT header and sets TcpIpChecksumNetBufferListInfo +* accordingly. +* For TCP packets with total length bigger than destination MSS it +* populates TcpLargeSendNetBufferListInfo. +* +* Returns NDIS_STATUS_SUCCESS normally. +* Fails only if packet data is invalid. +* (e.g. if OvsExtractLayers() returns an error). +*---------------------------------------------------------------------------- +*/ +NDIS_STATUS +OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr) { if ((sttHdr->flags & STT_CSUM_VERIFIED) || !(sttHdr->flags & STT_CSUM_PARTIAL)) { - return; + return NDIS_STATUS_SUCCESS; } - UINT8 protoType; + NDIS_STATUS status; NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + UINT8 protoType; + csumInfo.Value = 0; csumInfo.Transmit.IpHeaderChecksum = 0; csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset; @@ -703,25 +739,66 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr) csumInfo.Transmit.IsIPv6 = 1; csumInfo.Transmit.UdpChecksum = 1; } - NET_BUFFER_LIST_INFO(curNbl, + NET_BUFFER_LIST_INFO(*curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; - if (sttHdr->mss) { + if (sttHdr->mss && (sttHdr->flags & STT_PROTO_TCP)) { NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; + PMDL curMdl = NULL; + PNET_BUFFER curNb; + PUINT8 buf = NULL; + OVS_PACKET_HDR_INFO layers; + + status = OvsExtractLayers(*curNbl, &layers); + if (status != NDIS_STATUS_SUCCESS) { + return status; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + + buf = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, + LowPagePriority); + buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + + // apply pseudo checksum on extracted packet + if (sttHdr->flags & STT_PROTO_IPV4) { + IPHdr *ipHdr; + TCPHdr *tcpHdr; + + ipHdr = (IPHdr *)(buf + layers.l3Offset); + tcpHdr = (TCPHdr *)(buf + layers.l4Offset); + + tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr, + (uint32 *)&ipHdr->daddr, + IPPROTO_TCP, 0); + } else { + IPv6Hdr *ipHdr; + TCPHdr *tcpHdr; + + ipHdr = (IPv6Hdr *)(buf + layers.l3Offset); + tcpHdr = (TCPHdr *)(buf + layers.l4Offset); + + tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr, + (UINT32*)&ipHdr->daddr, + IPPROTO_TCP, 0); + } + + // setup LSO lsoInfo.Value = 0; lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset; - lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU - - sizeof(IPHdr) - - sizeof(TCPHdr); + lsoInfo.LsoV2Transmit.MSS = ntohs(sttHdr->mss); lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (sttHdr->flags & STT_PROTO_IPV4) { lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; } else { lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6; } - NET_BUFFER_LIST_INFO(curNbl, + NET_BUFFER_LIST_INFO(*curNbl, TcpLargeSendNetBufferListInfo) = lsoInfo.Value; } + + return NDIS_STATUS_SUCCESS; } /* @@ -736,15 +813,14 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, OvsIPv4TunnelKey *tunKey, PNET_BUFFER_LIST *newNbl) { - NDIS_STATUS status = NDIS_STATUS_FAILURE; - PNET_BUFFER curNb, newNb; + NDIS_STATUS status; + PNET_BUFFER curNb; IPHdr *ipHdr; char *ipBuf[sizeof(IPHdr)]; SttHdr stt; SttHdr *sttHdr; char *sttBuf[STT_HDR_LEN]; UINT32 advanceCnt, hdrLen; - BOOLEAN isLsoPacket = FALSE; curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); @@ -767,7 +843,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); /* Skip IP & TCP headers */ - hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), + hdrLen = (ipHdr->ihl * 4) + (tcp->doff * 4); NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); advanceCnt += hdrLen; @@ -775,7 +851,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT); UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len) - (ipHdr->ihl * 4) - - (sizeof * tcp); + - (tcp->doff * 4); /* Check if incoming packet requires reassembly */ if (totalLen != payloadLen) { @@ -788,7 +864,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, } *newNbl = pNbl; - isLsoPacket = TRUE; } else { /* STT Header */ sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, @@ -812,7 +887,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, OvsCompleteNBL(switchContext, *newNbl, TRUE); return NDIS_STATUS_FAILURE; } - newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); ASSERT(sttHdr); @@ -826,7 +900,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, tunKey->pad = 0; /* Set Checksum and LSO offload flags */ - OvsDecapSetOffloads(*newNbl, sttHdr); + OvsDecapSetOffloads(newNbl, sttHdr); return NDIS_STATUS_SUCCESS; } diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c index 92a71e1..c7ac284 100644 --- a/datapath-windows/ovsext/User.c +++ b/datapath-windows/ovsext/User.c @@ -768,7 +768,8 @@ OvsCreateAndAddPackets(PVOID userData, NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; UINT32 packetLength; - tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo); + tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, + TcpLargeSendNetBufferListInfo); nb = NET_BUFFER_LIST_FIRST_NB(nbl); packetLength = NET_BUFFER_DATA_LENGTH(nb); @@ -870,7 +871,8 @@ OvsCompletePacketHeader(UINT8 *packet, (UINT32 *)&ipHdr->DestinationAddress, IPPROTO_TCP, hdrInfoOut->l4PayLoad); } else { - PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset); + PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + + hdrInfoIn->l3Offset); hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipv6Hdr->PayloadLength) + hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)- @@ -884,9 +886,9 @@ OvsCompletePacketHeader(UINT8 *packet, hdrInfoOut->tcpCsumNeeded = 1; ovsUserStats.recalTcpCsum++; } else if (!isRecv) { - if (csumInfo.Transmit.TcpChecksum) { + if (hdrInfoIn->isTcp && csumInfo.Transmit.TcpChecksum) { hdrInfoOut->tcpCsumNeeded = 1; - } else if (csumInfo.Transmit.UdpChecksum) { + } else if (hdrInfoIn->isUdp && csumInfo.Transmit.UdpChecksum) { hdrInfoOut->udpCsumNeeded = 1; } if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) { @@ -896,7 +898,8 @@ OvsCompletePacketHeader(UINT8 *packet, hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; #endif if (hdrInfoIn->isIPv4) { - PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset); + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + + hdrInfoIn->l3Offset); hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - (ipHdr->HeaderLength << 2)); #ifdef DBG @@ -1004,8 +1007,8 @@ OvsCreateQueueNlPacket(PVOID userData, csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo); if (isRecv && (csumInfo.Receive.TcpChecksumFailed || - (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) || - csumInfo.Receive.IpChecksumFailed)) { + (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) || + csumInfo.Receive.IpChecksumFailed)) { OVS_LOG_INFO("Packet dropped due to checksum failure."); ovsUserStats.dropDuetoChecksum++; return NULL;
*Added OvsExtractLayers - populates only the layers field without unnecessary memory operations for flow part *If in STT header the flags are 0 then force packets checksums calculation on receive. *Ensure correct pseudo checksum is set for LSO both on send and receive. Linux includes the segment length to TCP pseudo-checksum conforming to RFC 793 but in case of LSO Windows expects this to be only on Source IP Address, Destination IP Address, and Protocol. *Fragment expiration on rx side of STT was set to 30 seconds, but the correct timeout would be TTL of the packet Signed-off-by: Paul-Daniel Boca <pboca@cloudbasesolutions.com> --- V2: Use STT_ENTRY_TIMEOUT on STT reassmble. Small refactoring and added LSO comment with specific requirements. --- datapath-windows/ovsext/Flow.c | 243 ++++++++++++++++++++++++++++----- datapath-windows/ovsext/Flow.h | 2 + datapath-windows/ovsext/PacketParser.c | 97 +++++++------ datapath-windows/ovsext/PacketParser.h | 8 +- datapath-windows/ovsext/Stt.c | 124 +++++++++++++---- datapath-windows/ovsext/User.c | 17 ++- 6 files changed, 377 insertions(+), 114 deletions(-)