diff mbox series

[v3,2/2] hw/net: Added basic IPv6 software fragmentation

Message ID 20200716035325.1406919-2-andrew@daynix.com
State New
Headers show
Series [v3,1/2] hw/net: Added plen fix for IPv6 | expand

Commit Message

Andrew Melnichenko July 16, 2020, 3:53 a.m. UTC
From: Andrew <andrew@daynix.com>

The basic IPv6 fragmentation - adding 'frag' extension to
the packet, overall shares some logic with IPv4. It works,
but there are still issues with a combination of
extensions - in the future, it would require refactoring
work to implement workflow with IPv6 and extension.
"Jumbo option" isn't added yet, until
qemu supports packets greater than 64K.

Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
---
 hw/net/net_tx_pkt.c |  7 ++--
 include/net/eth.h   | 14 +++++--
 net/eth.c           | 99 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 109 insertions(+), 11 deletions(-)

Comments

Jason Wang July 17, 2020, 6:05 a.m. UTC | #1
On 2020/7/16 上午11:53, andrew@daynix.com wrote:
> From: Andrew <andrew@daynix.com>
>
> The basic IPv6 fragmentation - adding 'frag' extension to
> the packet, overall shares some logic with IPv4. It works,
> but there are still issues with a combination of
> extensions - in the future, it would require refactoring
> work to implement workflow with IPv6 and extension.
> "Jumbo option" isn't added yet, until
> qemu supports packets greater than 64K.
>
> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>


Queued for 5.2

Thanks


> ---
>   hw/net/net_tx_pkt.c |  7 ++--
>   include/net/eth.h   | 14 +++++--
>   net/eth.c           | 99 ++++++++++++++++++++++++++++++++++++++++++---
>   3 files changed, 109 insertions(+), 11 deletions(-)
>
> diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
> index 9560e4a49e..74044c6618 100644
> --- a/hw/net/net_tx_pkt.c
> +++ b/hw/net/net_tx_pkt.c
> @@ -589,10 +589,11 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
>   
>           more_frags = (fragment_offset + fragment_len < pkt->payload_len);
>   
> -        eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base,
> -            l3_iov_len, fragment_len, fragment_offset, more_frags);
> +        eth_setup_ip_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base,
> +            &l3_iov_len, ETH_MAX_IP_DGRAM_LEN,
> +            fragment_len, fragment_offset, more_frags);
>   
> -        eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
> +        fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len;
>   
>           net_tx_pkt_sendv(pkt, nc, fragment, dst_idx);
>   
> diff --git a/include/net/eth.h b/include/net/eth.h
> index 0671be6916..05c75ac9fc 100644
> --- a/include/net/eth.h
> +++ b/include/net/eth.h
> @@ -139,6 +139,14 @@ struct ip6_ext_hdr_routing {
>       uint8_t     rsvd[4];
>   };
>   
> +struct ip6_ext_hdr_fragment {
> +    uint8_t     nxt;
> +    uint8_t     res0;
> +    uint16_t    off;
> +    uint32_t    id;
> +};
> +
> +
>   struct ip6_option_hdr {
>   #define IP6_OPT_PAD1   (0x00)
>   #define IP6_OPT_HOME   (0xC9)
> @@ -399,9 +407,9 @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
>                          eth_ip4_hdr_info *ip4hdr_info,
>                          eth_l4_hdr_info  *l4hdr_info);
>   
> -void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
> -                                 void *l3hdr, size_t l3hdr_len,
> -                                 size_t l3payload_len,
> +void eth_setup_ip_fragmentation(const void *l2hdr, size_t l2hdr_len,
> +                                 void *l3hdr, size_t *l3hdr_len,
> +                                 size_t l3hdr_max_len, size_t l3payload_len,
>                                    size_t frag_offset, bool more_frags);
>   
>   void
> diff --git a/net/eth.c b/net/eth.c
> index 0c1d413ee2..067111526d 100644
> --- a/net/eth.c
> +++ b/net/eth.c
> @@ -314,10 +314,65 @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
>       return 0;
>   }
>   
> +static bool eth_is_ip6_extension_header_type(uint8_t hdr_type);
> +
> +static void *eth_ip6_find_ext(struct ip6_header *ip6, uint8_t ext_type)
> +{
> +    uint8_t curr_ext_hdr_type = ip6->ip6_nxt;
> +    struct ip6_ext_hdr *ext_hdr = (struct ip6_ext_hdr *)(ip6 + 1);
> +    for (; eth_is_ip6_extension_header_type(curr_ext_hdr_type);) {
> +        if (curr_ext_hdr_type == ext_type) {
> +            return ext_hdr;
> +        }
> +        curr_ext_hdr_type = ext_hdr->ip6r_nxt;
> +        ext_hdr = (struct ip6_ext_hdr *)(((uint8_t *)ext_hdr)
> +                + (ext_hdr->ip6r_len + 1) * IP6_EXT_GRANULARITY);
> +    }
> +
> +    return NULL;
> +}
> +
> +/*
> + * To add an extension - there is should be
> + * enough memory 'behind' the ip6 header.
> + */
> +static void *eth_ip6_add_ext_nonsafe(struct ip6_header *ip6, uint8_t ext_type)
> +{
> +    uint8_t curr_ext_hdr_type = ip6->ip6_nxt;
> +    struct ip6_ext_hdr *ext_hdr = (struct ip6_ext_hdr *)(ip6 + 1);
> +    struct ip6_ext_hdr *ext_hdr_prev = NULL;
> +
> +    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
> +        ext_hdr->ip6r_nxt = ip6->ip6_nxt;
> +        ip6->ip6_nxt = ext_type;
> +        return ext_hdr;
> +    }
> +
> +    ext_hdr_prev = ext_hdr;
> +    curr_ext_hdr_type = ext_hdr->ip6r_nxt;
> +    ext_hdr = (struct ip6_ext_hdr *)(((uint8_t *)ext_hdr)
> +            + (ext_hdr->ip6r_len + 1) * IP6_EXT_GRANULARITY);
> +
> +    for (; eth_is_ip6_extension_header_type(curr_ext_hdr_type);) {
> +        ext_hdr_prev = ext_hdr;
> +        curr_ext_hdr_type = ext_hdr->ip6r_nxt;
> +        ext_hdr = (struct ip6_ext_hdr *)(((uint8_t *)ext_hdr)
> +                + (ext_hdr->ip6r_len + 1) * IP6_EXT_GRANULARITY);
> +    }
> +
> +    ext_hdr->ip6r_nxt = ext_hdr_prev->ip6r_nxt;
> +    ext_hdr_prev->ip6r_nxt = ext_type;
> +
> +    return ext_hdr;
> +}
> +
> +/* When IP6_FRAGMENT added, first 'id' would be 0x71656d75 */
> +static const uint32_t s_first_fragment_identificator = 0x71656d75; /* 'qemu' */
> +
>   void
> -eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
> -                            void *l3hdr, size_t l3hdr_len,
> -                            size_t l3payload_len,
> +eth_setup_ip_fragmentation(const void *l2hdr, size_t l2hdr_len,
> +                            void *l3hdr, size_t *l3hdr_len,
> +                            size_t l3hdr_max_len, size_t l3payload_len,
>                               size_t frag_offset, bool more_frags)
>   {
>       const struct iovec l2vec = {
> @@ -325,7 +380,9 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
>           .iov_len = l2hdr_len
>       };
>   
> -    if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
> +    uint16_t l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
> +
> +    if (l3_proto == ETH_P_IP) {
>           uint16_t orig_flags;
>           struct ip_header *iphdr = (struct ip_header *) l3hdr;
>           uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
> @@ -337,7 +394,39 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
>           orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
>           new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
>           iphdr->ip_off = cpu_to_be16(new_ip_off);
> -        iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
> +        iphdr->ip_len = cpu_to_be16(l3payload_len + *l3hdr_len);
> +
> +        eth_fix_ip4_checksum(l3hdr, *l3hdr_len);
> +    } else if (l3_proto == ETH_P_IPV6) {
> +        struct ip6_header *ip6 = (struct ip6_header *) l3hdr;
> +
> +        struct ip6_ext_hdr_fragment *frag_ext = NULL;
> +
> +        /* Find frag extension */
> +        frag_ext = eth_ip6_find_ext(ip6, IP6_FRAGMENT);
> +        if (frag_ext == NULL) {
> +            /* No frag extension? Add one */
> +            if (*l3hdr_len + sizeof(*frag_ext) > l3hdr_max_len) {
> +                /*
> +                 * TODO:
> +                 * For now, qemu's L3 buffer allocated with NetTxPkt
> +                 * and have enough memory(buffer allocated with 65K bytes
> +                 * ETH_MAX_IP_DGRAM_LEN = 0xFFFF)
> +                 * for additional extensions.
> +                 */
> +                return; /* TODO: request to reallocate l3hdr */
> +            }
> +            frag_ext = eth_ip6_add_ext_nonsafe(ip6, IP6_FRAGMENT);
> +            *l3hdr_len += sizeof(*frag_ext);
> +            static uint32_t s_id = s_first_fragment_identificator;
> +            frag_ext->id = cpu_to_be32(s_id);
> +            ++s_id;
> +        }
> +
> +        frag_ext->off = cpu_to_be16((frag_offset / IP_FRAG_UNIT_SIZE) << 3
> +                | (uint16_t)!!more_frags);
> +
> +        ip6->ip6_plen = cpu_to_be16(l3payload_len + *l3hdr_len - sizeof(*ip6));
>       }
>   }
>
diff mbox series

Patch

diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index 9560e4a49e..74044c6618 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -589,10 +589,11 @@  static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
 
         more_frags = (fragment_offset + fragment_len < pkt->payload_len);
 
-        eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base,
-            l3_iov_len, fragment_len, fragment_offset, more_frags);
+        eth_setup_ip_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base,
+            &l3_iov_len, ETH_MAX_IP_DGRAM_LEN,
+            fragment_len, fragment_offset, more_frags);
 
-        eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
+        fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len;
 
         net_tx_pkt_sendv(pkt, nc, fragment, dst_idx);
 
diff --git a/include/net/eth.h b/include/net/eth.h
index 0671be6916..05c75ac9fc 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -139,6 +139,14 @@  struct ip6_ext_hdr_routing {
     uint8_t     rsvd[4];
 };
 
+struct ip6_ext_hdr_fragment {
+    uint8_t     nxt;
+    uint8_t     res0;
+    uint16_t    off;
+    uint32_t    id;
+};
+
+
 struct ip6_option_hdr {
 #define IP6_OPT_PAD1   (0x00)
 #define IP6_OPT_HOME   (0xC9)
@@ -399,9 +407,9 @@  void eth_get_protocols(const struct iovec *iov, int iovcnt,
                        eth_ip4_hdr_info *ip4hdr_info,
                        eth_l4_hdr_info  *l4hdr_info);
 
-void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
-                                 void *l3hdr, size_t l3hdr_len,
-                                 size_t l3payload_len,
+void eth_setup_ip_fragmentation(const void *l2hdr, size_t l2hdr_len,
+                                 void *l3hdr, size_t *l3hdr_len,
+                                 size_t l3hdr_max_len, size_t l3payload_len,
                                  size_t frag_offset, bool more_frags);
 
 void
diff --git a/net/eth.c b/net/eth.c
index 0c1d413ee2..067111526d 100644
--- a/net/eth.c
+++ b/net/eth.c
@@ -314,10 +314,65 @@  eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
     return 0;
 }
 
+static bool eth_is_ip6_extension_header_type(uint8_t hdr_type);
+
+static void *eth_ip6_find_ext(struct ip6_header *ip6, uint8_t ext_type)
+{
+    uint8_t curr_ext_hdr_type = ip6->ip6_nxt;
+    struct ip6_ext_hdr *ext_hdr = (struct ip6_ext_hdr *)(ip6 + 1);
+    for (; eth_is_ip6_extension_header_type(curr_ext_hdr_type);) {
+        if (curr_ext_hdr_type == ext_type) {
+            return ext_hdr;
+        }
+        curr_ext_hdr_type = ext_hdr->ip6r_nxt;
+        ext_hdr = (struct ip6_ext_hdr *)(((uint8_t *)ext_hdr)
+                + (ext_hdr->ip6r_len + 1) * IP6_EXT_GRANULARITY);
+    }
+
+    return NULL;
+}
+
+/*
+ * To add an extension - there is should be
+ * enough memory 'behind' the ip6 header.
+ */
+static void *eth_ip6_add_ext_nonsafe(struct ip6_header *ip6, uint8_t ext_type)
+{
+    uint8_t curr_ext_hdr_type = ip6->ip6_nxt;
+    struct ip6_ext_hdr *ext_hdr = (struct ip6_ext_hdr *)(ip6 + 1);
+    struct ip6_ext_hdr *ext_hdr_prev = NULL;
+
+    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
+        ext_hdr->ip6r_nxt = ip6->ip6_nxt;
+        ip6->ip6_nxt = ext_type;
+        return ext_hdr;
+    }
+
+    ext_hdr_prev = ext_hdr;
+    curr_ext_hdr_type = ext_hdr->ip6r_nxt;
+    ext_hdr = (struct ip6_ext_hdr *)(((uint8_t *)ext_hdr)
+            + (ext_hdr->ip6r_len + 1) * IP6_EXT_GRANULARITY);
+
+    for (; eth_is_ip6_extension_header_type(curr_ext_hdr_type);) {
+        ext_hdr_prev = ext_hdr;
+        curr_ext_hdr_type = ext_hdr->ip6r_nxt;
+        ext_hdr = (struct ip6_ext_hdr *)(((uint8_t *)ext_hdr)
+                + (ext_hdr->ip6r_len + 1) * IP6_EXT_GRANULARITY);
+    }
+
+    ext_hdr->ip6r_nxt = ext_hdr_prev->ip6r_nxt;
+    ext_hdr_prev->ip6r_nxt = ext_type;
+
+    return ext_hdr;
+}
+
+/* When IP6_FRAGMENT added, first 'id' would be 0x71656d75 */
+static const uint32_t s_first_fragment_identificator = 0x71656d75; /* 'qemu' */
+
 void
-eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
-                            void *l3hdr, size_t l3hdr_len,
-                            size_t l3payload_len,
+eth_setup_ip_fragmentation(const void *l2hdr, size_t l2hdr_len,
+                            void *l3hdr, size_t *l3hdr_len,
+                            size_t l3hdr_max_len, size_t l3payload_len,
                             size_t frag_offset, bool more_frags)
 {
     const struct iovec l2vec = {
@@ -325,7 +380,9 @@  eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
         .iov_len = l2hdr_len
     };
 
-    if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
+    uint16_t l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
+
+    if (l3_proto == ETH_P_IP) {
         uint16_t orig_flags;
         struct ip_header *iphdr = (struct ip_header *) l3hdr;
         uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
@@ -337,7 +394,39 @@  eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
         orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
         new_ip_off = frag_off_units | orig_flags  | (more_frags ? IP_MF : 0);
         iphdr->ip_off = cpu_to_be16(new_ip_off);
-        iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
+        iphdr->ip_len = cpu_to_be16(l3payload_len + *l3hdr_len);
+
+        eth_fix_ip4_checksum(l3hdr, *l3hdr_len);
+    } else if (l3_proto == ETH_P_IPV6) {
+        struct ip6_header *ip6 = (struct ip6_header *) l3hdr;
+
+        struct ip6_ext_hdr_fragment *frag_ext = NULL;
+
+        /* Find frag extension */
+        frag_ext = eth_ip6_find_ext(ip6, IP6_FRAGMENT);
+        if (frag_ext == NULL) {
+            /* No frag extension? Add one */
+            if (*l3hdr_len + sizeof(*frag_ext) > l3hdr_max_len) {
+                /*
+                 * TODO:
+                 * For now, qemu's L3 buffer allocated with NetTxPkt
+                 * and have enough memory(buffer allocated with 65K bytes
+                 * ETH_MAX_IP_DGRAM_LEN = 0xFFFF)
+                 * for additional extensions.
+                 */
+                return; /* TODO: request to reallocate l3hdr */
+            }
+            frag_ext = eth_ip6_add_ext_nonsafe(ip6, IP6_FRAGMENT);
+            *l3hdr_len += sizeof(*frag_ext);
+            static uint32_t s_id = s_first_fragment_identificator;
+            frag_ext->id = cpu_to_be32(s_id);
+            ++s_id;
+        }
+
+        frag_ext->off = cpu_to_be16((frag_offset / IP_FRAG_UNIT_SIZE) << 3
+                | (uint16_t)!!more_frags);
+
+        ip6->ip6_plen = cpu_to_be16(l3payload_len + *l3hdr_len - sizeof(*ip6));
     }
 }