diff mbox

[RFC,v4,3/3] virtio-net rsc: support coalescing ipv6 tcp traffic

Message ID 1459711556-10273-4-git-send-email-wexu@redhat.com
State New
Headers show

Commit Message

Wei Xu April 3, 2016, 7:25 p.m. UTC
From: Wei Xu <wexu@redhat.com>

Most things like ipv4 except there is a significant difference between ipv4
and ipv6, the fragment lenght in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real '65535' payload.

Signed-off-by: Wei Xu <wexu@redhat.com>
---
 hw/net/virtio-net.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 141 insertions(+), 6 deletions(-)

Comments

Jason Wang April 5, 2016, 2:50 a.m. UTC | #1
On 04/04/2016 03:25 AM, wexu@redhat.com wrote:
> From: Wei Xu <wexu@redhat.com>
>
> Most things like ipv4 except there is a significant difference between ipv4
> and ipv6, the fragment lenght in ipv4 header includes itself, while it's not

typo

> included for ipv6, thus means ipv6 can carry a real '65535' payload.
>
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
>  hw/net/virtio-net.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 141 insertions(+), 6 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 81e8e71..2d09352 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -50,6 +50,10 @@
>  /* header lenght value in ip header without option */
>  #define VIRTIO_NET_IP4_HEADER_LENGTH 5
>  
> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
> +#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
> +
>  /* Purge coalesced packets timer interval */
>  #define VIRTIO_NET_RSC_INTERVAL  300000
>  
> @@ -1725,6 +1729,25 @@ static void virtio_net_rsc_extract_unit4(NetRscChain *chain,
>      unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
>  }
>  
> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
> +                                         const uint8_t *buf, NetRscUnit* unit)
> +{
> +    uint16_t hdr_len;
> +    struct ip6_header *ip6;
> +
> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
> +    ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_header));
> +    unit->ip = ip6;
> +    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
> +    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
> +                                        + sizeof(struct ip6_header));
> +    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
> +
> +    /* There is a difference between payload lenght in ipv4 and v6,
> +       ip header is excluded in ipv6 */
> +    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
> +}
> +
>  static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
>  {
>      uint32_t sum;
> @@ -1738,7 +1761,9 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
>  {
>      int ret;
>  
> -    virtio_net_rsc_ipv4_checksum(seg->unit.ip);
> +    if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
> +        virtio_net_rsc_ipv4_checksum(seg->unit.ip);
> +    }

Why not introduce proto specific checksum function for chain?

>      ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
>      QTAILQ_REMOVE(&chain->buffers, seg, next);
>      g_free(seg->buf);
> @@ -1804,7 +1829,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
>      QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
>      chain->stat.cache++;
>  
> -    virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
> +    switch (chain->proto) {
> +    case ETH_P_IP:
> +        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);

Another call for proto specific callbacks maybe?

> +        break;
> +
> +    case ETH_P_IPV6:
> +        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
> +        break;
> +
> +    default:
> +        g_assert_not_reached();
> +    }
>  }
>  
>  static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
> @@ -1948,6 +1984,24 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
>      return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>  }
>  
> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg,
> +                        const uint8_t *buf, size_t size, NetRscUnit *unit)
> +{
> +    struct ip6_header *ip1, *ip2;
> +
> +    ip1 = (struct ip6_header *)(unit->ip);
> +    ip2 = (struct ip6_header *)(seg->unit.ip);
> +    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
> +        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
> +        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
> +        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
> +            chain->stat.no_match++;
> +            return RSC_NO_MATCH;
> +    }
> +
> +    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
> +}
> +
>  /* Pakcets with 'SYN' should bypass, other flag should be sent after drain
>   * to prevent out of order */
>  static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
> @@ -1991,7 +2045,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc,
>      NetRscSeg *seg, *nseg;
>  
>      QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
> -        ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
> +        if (chain->proto == ETH_P_IP) {
> +            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
> +        } else {
> +            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);

Ditto.

> +        }
>  
>          if (ret == RSC_FINAL) {
>              if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
> @@ -2116,13 +2174,82 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
>      return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>  }
>  
> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
> +                                            struct ip6_header *ip6,
> +                                            const uint8_t *buf, size_t size)
> +{
> +    uint16_t ip_len;
> +    uint16_t hdr_len;
> +
> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
> +    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
> +        + sizeof(tcp_header))) {
> +        return RSC_BYPASS;
> +    }
> +
> +    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
> +        != IP_HEADER_VERSION_6) {
> +        return RSC_BYPASS;
> +    }
> +
> +    /* Both option and protocol is checked in this */
> +    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
> +        chain->stat.bypass_not_tcp++;
> +        return RSC_BYPASS;
> +    }
> +
> +    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
> +    if (ip_len < sizeof(struct tcp_header)
> +        || ip_len > (size - hdr_len - sizeof(struct eth_header)
> +                     - sizeof(struct ip6_header))) {
> +        chain->stat.ip_hacked++;
> +        return RSC_BYPASS;
> +    }
> +
> +    return RSC_WANT;
> +}
> +
> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
> +                                      const uint8_t *buf, size_t size)
> +{
> +    int32_t ret;
> +    uint16_t hdr_len;
> +    NetRscChain *chain;
> +    NetRscUnit unit;
> +
> +    chain = (NetRscChain *)opq;
> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
> +    virtio_net_rsc_extract_unit6(chain, buf, &unit);
> +    if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
> +                                                 unit.ip, buf, size)) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    }
> +
> +    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
> +    if (ret == RSC_BYPASS) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    } else if (ret == RSC_FINAL) {
> +        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
> +                ((hdr_len + sizeof(struct eth_header)) + 8),
> +                VIRTIO_NET_IP6_ADDR_SIZE,
> +                hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header),
> +                VIRTIO_NET_TCP_PORT_SIZE);
> +    }
> +
> +    if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
> +        return size;
> +    }
> +
> +    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
> +}
> +
>  static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
>                                                  NetClientState *nc,
>                                                  uint16_t proto)
>  {
>      NetRscChain *chain;
>  
> -    if (proto != (uint16_t)ETH_P_IP) {
> +    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
>          return NULL;
>      }
>  
> @@ -2135,7 +2262,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
>      chain = g_malloc(sizeof(*chain));
>      chain->n = n;
>      chain->proto = proto;
> -    chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
> +    if (proto == (uint16_t)ETH_P_IP) {
> +        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
> +    } else {
> +        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
> +    }
>      chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>                                        virtio_net_rsc_purge, chain);
>      memset(&chain->stat, 0, sizeof(chain->stat));
> @@ -2167,7 +2298,11 @@ static ssize_t virtio_net_rsc_receive(NetClientState *nc,
>          return virtio_net_do_receive(nc, buf, size);
>      } else {
>          chain->stat.received++;
> -        return virtio_net_rsc_receive4(chain, nc, buf, size);
> +        if (proto == (uint16_t)ETH_P_IP) {
> +            return virtio_net_rsc_receive4(chain, nc, buf, size);
> +        } else  {
> +            return virtio_net_rsc_receive6(chain, nc, buf, size);
> +        }
>      }
>  }
>
Wei Xu April 8, 2016, 7:06 a.m. UTC | #2
On 2016年04月05日 10:50, Jason Wang wrote:
>
> On 04/04/2016 03:25 AM, wexu@redhat.com wrote:
>> From: Wei Xu <wexu@redhat.com>
>>
>> Most things like ipv4 except there is a significant difference between ipv4
>> and ipv6, the fragment lenght in ipv4 header includes itself, while it's not
> typo
Thanks.
>
>> included for ipv6, thus means ipv6 can carry a real '65535' payload.
>>
>> Signed-off-by: Wei Xu <wexu@redhat.com>
>> ---
>>   hw/net/virtio-net.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 141 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 81e8e71..2d09352 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -50,6 +50,10 @@
>>   /* header lenght value in ip header without option */
>>   #define VIRTIO_NET_IP4_HEADER_LENGTH 5
>>   
>> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
>> +#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
>> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
>> +
>>   /* Purge coalesced packets timer interval */
>>   #define VIRTIO_NET_RSC_INTERVAL  300000
>>   
>> @@ -1725,6 +1729,25 @@ static void virtio_net_rsc_extract_unit4(NetRscChain *chain,
>>       unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
>>   }
>>   
>> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
>> +                                         const uint8_t *buf, NetRscUnit* unit)
>> +{
>> +    uint16_t hdr_len;
>> +    struct ip6_header *ip6;
>> +
>> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> +    ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_header));
>> +    unit->ip = ip6;
>> +    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>> +    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
>> +                                        + sizeof(struct ip6_header));
>> +    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
>> +
>> +    /* There is a difference between payload lenght in ipv4 and v6,
>> +       ip header is excluded in ipv6 */
>> +    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
>> +}
>> +
>>   static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
>>   {
>>       uint32_t sum;
>> @@ -1738,7 +1761,9 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
>>   {
>>       int ret;
>>   
>> -    virtio_net_rsc_ipv4_checksum(seg->unit.ip);
>> +    if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
>> +        virtio_net_rsc_ipv4_checksum(seg->unit.ip);
>> +    }
> Why not introduce proto specific checksum function for chain?
Since there are only 2 protocols to be supported, and very limited 
extension for this feature, mst suggest to use direct call in v2 patch
to make things simple, and i took it.
>
>>       ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
>>       QTAILQ_REMOVE(&chain->buffers, seg, next);
>>       g_free(seg->buf);
>> @@ -1804,7 +1829,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
>>       QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
>>       chain->stat.cache++;
>>   
>> -    virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
>> +    switch (chain->proto) {
>> +    case ETH_P_IP:
>> +        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
> Another call for proto specific callbacks maybe?
Same as above.
>
>> +        break;
>> +
>> +    case ETH_P_IPV6:
>> +        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
>> +        break;
>> +
>> +    default:
>> +        g_assert_not_reached();
>> +    }
>>   }
>>   
>>   static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
>> @@ -1948,6 +1984,24 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
>>       return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>>   }
>>   
>> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg,
>> +                        const uint8_t *buf, size_t size, NetRscUnit *unit)
>> +{
>> +    struct ip6_header *ip1, *ip2;
>> +
>> +    ip1 = (struct ip6_header *)(unit->ip);
>> +    ip2 = (struct ip6_header *)(seg->unit.ip);
>> +    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
>> +        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
>> +        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
>> +        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
>> +            chain->stat.no_match++;
>> +            return RSC_NO_MATCH;
>> +    }
>> +
>> +    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>> +}
>> +
>>   /* Pakcets with 'SYN' should bypass, other flag should be sent after drain
>>    * to prevent out of order */
>>   static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
>> @@ -1991,7 +2045,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc,
>>       NetRscSeg *seg, *nseg;
>>   
>>       QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
>> -        ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
>> +        if (chain->proto == ETH_P_IP) {
>> +            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
>> +        } else {
>> +            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
> Ditto.
Ditto too:)
>
>> +        }
>>   
>>           if (ret == RSC_FINAL) {
>>               if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
>> @@ -2116,13 +2174,82 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
>>       return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>>   }
>>   
>> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
>> +                                            struct ip6_header *ip6,
>> +                                            const uint8_t *buf, size_t size)
>> +{
>> +    uint16_t ip_len;
>> +    uint16_t hdr_len;
>> +
>> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> +    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
>> +        + sizeof(tcp_header))) {
>> +        return RSC_BYPASS;
>> +    }
>> +
>> +    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
>> +        != IP_HEADER_VERSION_6) {
>> +        return RSC_BYPASS;
>> +    }
>> +
>> +    /* Both option and protocol is checked in this */
>> +    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
>> +        chain->stat.bypass_not_tcp++;
>> +        return RSC_BYPASS;
>> +    }
>> +
>> +    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>> +    if (ip_len < sizeof(struct tcp_header)
>> +        || ip_len > (size - hdr_len - sizeof(struct eth_header)
>> +                     - sizeof(struct ip6_header))) {
>> +        chain->stat.ip_hacked++;
>> +        return RSC_BYPASS;
>> +    }
>> +
>> +    return RSC_WANT;
>> +}
>> +
>> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
>> +                                      const uint8_t *buf, size_t size)
>> +{
>> +    int32_t ret;
>> +    uint16_t hdr_len;
>> +    NetRscChain *chain;
>> +    NetRscUnit unit;
>> +
>> +    chain = (NetRscChain *)opq;
>> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> +    virtio_net_rsc_extract_unit6(chain, buf, &unit);
>> +    if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
>> +                                                 unit.ip, buf, size)) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    }
>> +
>> +    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
>> +    if (ret == RSC_BYPASS) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    } else if (ret == RSC_FINAL) {
>> +        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
>> +                ((hdr_len + sizeof(struct eth_header)) + 8),
>> +                VIRTIO_NET_IP6_ADDR_SIZE,
>> +                hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header),
>> +                VIRTIO_NET_TCP_PORT_SIZE);
>> +    }
>> +
>> +    if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
>> +        return size;
>> +    }
>> +
>> +    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>> +}
>> +
>>   static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
>>                                                   NetClientState *nc,
>>                                                   uint16_t proto)
>>   {
>>       NetRscChain *chain;
>>   
>> -    if (proto != (uint16_t)ETH_P_IP) {
>> +    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
>>           return NULL;
>>       }
>>   
>> @@ -2135,7 +2262,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
>>       chain = g_malloc(sizeof(*chain));
>>       chain->n = n;
>>       chain->proto = proto;
>> -    chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
>> +    if (proto == (uint16_t)ETH_P_IP) {
>> +        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
>> +    } else {
>> +        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
>> +    }
>>       chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>>                                         virtio_net_rsc_purge, chain);
>>       memset(&chain->stat, 0, sizeof(chain->stat));
>> @@ -2167,7 +2298,11 @@ static ssize_t virtio_net_rsc_receive(NetClientState *nc,
>>           return virtio_net_do_receive(nc, buf, size);
>>       } else {
>>           chain->stat.received++;
>> -        return virtio_net_rsc_receive4(chain, nc, buf, size);
>> +        if (proto == (uint16_t)ETH_P_IP) {
>> +            return virtio_net_rsc_receive4(chain, nc, buf, size);
>> +        } else  {
>> +            return virtio_net_rsc_receive6(chain, nc, buf, size);
>> +        }
>>       }
>>   }
>>   
>
Jason Wang April 8, 2016, 7:27 a.m. UTC | #3
On 04/08/2016 03:06 PM, Wei Xu wrote:
>
>
> On 2016年04月05日 10:50, Jason Wang wrote:
>>
>> On 04/04/2016 03:25 AM, wexu@redhat.com wrote:
>>> From: Wei Xu <wexu@redhat.com>
>>>
>>> Most things like ipv4 except there is a significant difference
>>> between ipv4
>>> and ipv6, the fragment lenght in ipv4 header includes itself, while
>>> it's not
>> typo
> Thanks.
>>
>>> included for ipv6, thus means ipv6 can carry a real '65535' payload.
>>>
>>> Signed-off-by: Wei Xu <wexu@redhat.com>
>>> ---
>>>   hw/net/virtio-net.c | 147
>>> +++++++++++++++++++++++++++++++++++++++++++++++++---
>>>   1 file changed, 141 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>>> index 81e8e71..2d09352 100644
>>> --- a/hw/net/virtio-net.c
>>> +++ b/hw/net/virtio-net.c
>>> @@ -50,6 +50,10 @@
>>>   /* header lenght value in ip header without option */
>>>   #define VIRTIO_NET_IP4_HEADER_LENGTH 5
>>>   +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
>>> +#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
>>> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
>>> +
>>>   /* Purge coalesced packets timer interval */
>>>   #define VIRTIO_NET_RSC_INTERVAL  300000
>>>   @@ -1725,6 +1729,25 @@ static void
>>> virtio_net_rsc_extract_unit4(NetRscChain *chain,
>>>       unit->payload = htons(*unit->ip_plen) - ip_hdrlen -
>>> unit->tcp_hdrlen;
>>>   }
>>>   +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
>>> +                                         const uint8_t *buf,
>>> NetRscUnit* unit)
>>> +{
>>> +    uint16_t hdr_len;
>>> +    struct ip6_header *ip6;
>>> +
>>> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>>> +    ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct
>>> eth_header));
>>> +    unit->ip = ip6;
>>> +    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>>> +    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
>>> +                                        + sizeof(struct ip6_header));
>>> +    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000)
>>> >> 10;
>>> +
>>> +    /* There is a difference between payload lenght in ipv4 and v6,
>>> +       ip header is excluded in ipv6 */
>>> +    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
>>> +}
>>> +
>>>   static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
>>>   {
>>>       uint32_t sum;
>>> @@ -1738,7 +1761,9 @@ static size_t
>>> virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
>>>   {
>>>       int ret;
>>>   -    virtio_net_rsc_ipv4_checksum(seg->unit.ip);
>>> +    if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
>>> +        virtio_net_rsc_ipv4_checksum(seg->unit.ip);
>>> +    }
>> Why not introduce proto specific checksum function for chain?
> Since there are only 2 protocols to be supported, and very limited
> extension for this feature, mst suggest to use direct call in v2 patch
> to make things simple, and i took it.

Have you tried with my suggestion? I think it will actually simplify the
current code (at least several lines of codes).
Wei Xu April 8, 2016, 7:51 a.m. UTC | #4
On 2016年04月08日 15:27, Jason Wang wrote:
>
> On 04/08/2016 03:06 PM, Wei Xu wrote:
>>
>> On 2016年04月05日 10:50, Jason Wang wrote:
>>> On 04/04/2016 03:25 AM, wexu@redhat.com wrote:
>>>> From: Wei Xu <wexu@redhat.com>
>>>>
>>>> Most things like ipv4 except there is a significant difference
>>>> between ipv4
>>>> and ipv6, the fragment lenght in ipv4 header includes itself, while
>>>> it's not
>>> typo
>> Thanks.
>>>> included for ipv6, thus means ipv6 can carry a real '65535' payload.
>>>>
>>>> Signed-off-by: Wei Xu <wexu@redhat.com>
>>>> ---
>>>>    hw/net/virtio-net.c | 147
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++---
>>>>    1 file changed, 141 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>>>> index 81e8e71..2d09352 100644
>>>> --- a/hw/net/virtio-net.c
>>>> +++ b/hw/net/virtio-net.c
>>>> @@ -50,6 +50,10 @@
>>>>    /* header lenght value in ip header without option */
>>>>    #define VIRTIO_NET_IP4_HEADER_LENGTH 5
>>>>    +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
>>>> +#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
>>>> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
>>>> +
>>>>    /* Purge coalesced packets timer interval */
>>>>    #define VIRTIO_NET_RSC_INTERVAL  300000
>>>>    @@ -1725,6 +1729,25 @@ static void
>>>> virtio_net_rsc_extract_unit4(NetRscChain *chain,
>>>>        unit->payload = htons(*unit->ip_plen) - ip_hdrlen -
>>>> unit->tcp_hdrlen;
>>>>    }
>>>>    +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
>>>> +                                         const uint8_t *buf,
>>>> NetRscUnit* unit)
>>>> +{
>>>> +    uint16_t hdr_len;
>>>> +    struct ip6_header *ip6;
>>>> +
>>>> +    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>>>> +    ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct
>>>> eth_header));
>>>> +    unit->ip = ip6;
>>>> +    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>>>> +    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
>>>> +                                        + sizeof(struct ip6_header));
>>>> +    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000)
>>>>>> 10;
>>>> +
>>>> +    /* There is a difference between payload lenght in ipv4 and v6,
>>>> +       ip header is excluded in ipv6 */
>>>> +    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
>>>> +}
>>>> +
>>>>    static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
>>>>    {
>>>>        uint32_t sum;
>>>> @@ -1738,7 +1761,9 @@ static size_t
>>>> virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
>>>>    {
>>>>        int ret;
>>>>    -    virtio_net_rsc_ipv4_checksum(seg->unit.ip);
>>>> +    if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
>>>> +        virtio_net_rsc_ipv4_checksum(seg->unit.ip);
>>>> +    }
>>> Why not introduce proto specific checksum function for chain?
>> Since there are only 2 protocols to be supported, and very limited
>> extension for this feature, mst suggest to use direct call in v2 patch
>> to make things simple, and i took it.
> Have you tried with my suggestion? I think it will actually simplify the
> current code (at least several lines of codes).
ok, will give it a try.
diff mbox

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 81e8e71..2d09352 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -50,6 +50,10 @@ 
 /* header lenght value in ip header without option */
 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
 
+#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
+#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
+#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
+
 /* Purge coalesced packets timer interval */
 #define VIRTIO_NET_RSC_INTERVAL  300000
 
@@ -1725,6 +1729,25 @@  static void virtio_net_rsc_extract_unit4(NetRscChain *chain,
     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
 }
 
+static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
+                                         const uint8_t *buf, NetRscUnit* unit)
+{
+    uint16_t hdr_len;
+    struct ip6_header *ip6;
+
+    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
+    ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_header));
+    unit->ip = ip6;
+    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
+    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
+                                        + sizeof(struct ip6_header));
+    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
+
+    /* There is a difference between payload lenght in ipv4 and v6,
+       ip header is excluded in ipv6 */
+    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
+}
+
 static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
 {
     uint32_t sum;
@@ -1738,7 +1761,9 @@  static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
 {
     int ret;
 
-    virtio_net_rsc_ipv4_checksum(seg->unit.ip);
+    if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
+        virtio_net_rsc_ipv4_checksum(seg->unit.ip);
+    }
     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
     QTAILQ_REMOVE(&chain->buffers, seg, next);
     g_free(seg->buf);
@@ -1804,7 +1829,18 @@  static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
     chain->stat.cache++;
 
-    virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
+    switch (chain->proto) {
+    case ETH_P_IP:
+        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
+        break;
+
+    case ETH_P_IPV6:
+        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
 }
 
 static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
@@ -1948,6 +1984,24 @@  static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
 }
 
+static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg,
+                        const uint8_t *buf, size_t size, NetRscUnit *unit)
+{
+    struct ip6_header *ip1, *ip2;
+
+    ip1 = (struct ip6_header *)(unit->ip);
+    ip2 = (struct ip6_header *)(seg->unit.ip);
+    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
+        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
+        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
+        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
+            chain->stat.no_match++;
+            return RSC_NO_MATCH;
+    }
+
+    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
+}
+
 /* Pakcets with 'SYN' should bypass, other flag should be sent after drain
  * to prevent out of order */
 static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
@@ -1991,7 +2045,11 @@  static size_t virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc,
     NetRscSeg *seg, *nseg;
 
     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
-        ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
+        if (chain->proto == ETH_P_IP) {
+            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
+        } else {
+            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
+        }
 
         if (ret == RSC_FINAL) {
             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
@@ -2116,13 +2174,82 @@  static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
 }
 
+static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
+                                            struct ip6_header *ip6,
+                                            const uint8_t *buf, size_t size)
+{
+    uint16_t ip_len;
+    uint16_t hdr_len;
+
+    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
+    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
+        + sizeof(tcp_header))) {
+        return RSC_BYPASS;
+    }
+
+    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
+        != IP_HEADER_VERSION_6) {
+        return RSC_BYPASS;
+    }
+
+    /* Both option and protocol is checked in this */
+    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
+        chain->stat.bypass_not_tcp++;
+        return RSC_BYPASS;
+    }
+
+    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
+    if (ip_len < sizeof(struct tcp_header)
+        || ip_len > (size - hdr_len - sizeof(struct eth_header)
+                     - sizeof(struct ip6_header))) {
+        chain->stat.ip_hacked++;
+        return RSC_BYPASS;
+    }
+
+    return RSC_WANT;
+}
+
+static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
+                                      const uint8_t *buf, size_t size)
+{
+    int32_t ret;
+    uint16_t hdr_len;
+    NetRscChain *chain;
+    NetRscUnit unit;
+
+    chain = (NetRscChain *)opq;
+    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
+    virtio_net_rsc_extract_unit6(chain, buf, &unit);
+    if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
+                                                 unit.ip, buf, size)) {
+        return virtio_net_do_receive(nc, buf, size);
+    }
+
+    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
+    if (ret == RSC_BYPASS) {
+        return virtio_net_do_receive(nc, buf, size);
+    } else if (ret == RSC_FINAL) {
+        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
+                ((hdr_len + sizeof(struct eth_header)) + 8),
+                VIRTIO_NET_IP6_ADDR_SIZE,
+                hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header),
+                VIRTIO_NET_TCP_PORT_SIZE);
+    }
+
+    if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
+        return size;
+    }
+
+    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
+}
+
 static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
                                                 NetClientState *nc,
                                                 uint16_t proto)
 {
     NetRscChain *chain;
 
-    if (proto != (uint16_t)ETH_P_IP) {
+    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
         return NULL;
     }
 
@@ -2135,7 +2262,11 @@  static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
     chain = g_malloc(sizeof(*chain));
     chain->n = n;
     chain->proto = proto;
-    chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
+    if (proto == (uint16_t)ETH_P_IP) {
+        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
+    } else {
+        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
+    }
     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                       virtio_net_rsc_purge, chain);
     memset(&chain->stat, 0, sizeof(chain->stat));
@@ -2167,7 +2298,11 @@  static ssize_t virtio_net_rsc_receive(NetClientState *nc,
         return virtio_net_do_receive(nc, buf, size);
     } else {
         chain->stat.received++;
-        return virtio_net_rsc_receive4(chain, nc, buf, size);
+        if (proto == (uint16_t)ETH_P_IP) {
+            return virtio_net_rsc_receive4(chain, nc, buf, size);
+        } else  {
+            return virtio_net_rsc_receive6(chain, nc, buf, size);
+        }
     }
 }