diff mbox series

[ovs-dev] flow: Consistent VXLAN UDP src ports for fragmented packets

Message ID 20221202125507.12875-1-hemanth.a@acldigital.com
State Changes Requested
Headers show
Series [ovs-dev] flow: Consistent VXLAN UDP src ports for fragmented packets | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Hemanth Aramadaka Dec. 2, 2022, 12:55 p.m. UTC
Issue:

The src-port for UDP is based on RSS hash in the packet metadata.
In case of packets coming from VM it will be 5-tuple, if available,
otherwise just IP addresses.If the VM fragments a large IP packet
and sends the fragments to ovs, only the first fragment will contain
the L4 header. Therefore, the first fragment and subsequent fragments
get different UDP src ports in the outgoing VXLAN header.This can
lead to fragment re-ordering in the fabric as packet will take
different paths.

Fix:

Intention of this is to avoid fragment packets taking different paths.
For example, due to presence of firewalls, fragment packets will take
different paths and will get dropped.To avoid this we ignore the L4
header during hash calculation only in the case of fragmented packets.

Signed-off-by: Hemanth Aramadaka <hemanth.a@acldigital.com>
---
 lib/flow.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

Comments

Ilya Maximets Dec. 2, 2022, 3:07 p.m. UTC | #1
On 12/2/22 13:55, Hemanth Aramadaka via dev wrote:
> Issue:
> 
> The src-port for UDP is based on RSS hash in the packet metadata.
> In case of packets coming from VM it will be 5-tuple, if available,
> otherwise just IP addresses.If the VM fragments a large IP packet
> and sends the fragments to ovs, only the first fragment will contain
> the L4 header. Therefore, the first fragment and subsequent fragments
> get different UDP src ports in the outgoing VXLAN header.This can
> lead to fragment re-ordering in the fabric as packet will take
> different paths.
> 
> Fix:
> 
> Intention of this is to avoid fragment packets taking different paths.
> For example, due to presence of firewalls, fragment packets will take
> different paths and will get dropped.To avoid this we ignore the L4
> header during hash calculation only in the case of fragmented packets.
> 
> Signed-off-by: Hemanth Aramadaka <hemanth.a@acldigital.com>
> ---
>  lib/flow.c | 22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/flow.c b/lib/flow.c
> index c3a3aa3ce..9a65c304e 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -1018,7 +1018,9 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>                      miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>                      miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
>                      if (dl_type == htons(ETH_TYPE_IP)) {
> -                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                        if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
> +                            dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                        }
>                      } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>                          dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
>                      }
> @@ -1033,7 +1035,9 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>                  miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
>                  miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
>                  if (dl_type == htons(ETH_TYPE_IP)) {
> -                    dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                    if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
> +                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
> +                    }
>                  } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>                      dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
>                  }
> @@ -2248,7 +2252,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
>  
>      if (flow) {
>          ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
> -        uint8_t nw_proto;
> +        uint8_t nw_proto, nw_frag;
>  
>          if (dl_type == htons(ETH_TYPE_IPV6)) {
>              struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
> @@ -2270,6 +2274,11 @@ miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
>  
>          nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
>          hash = hash_add(hash, nw_proto);
> +
> +        nw_frag = MINIFLOW_GET_U8(flow, nw_frag);
> +        if (nw_frag & FLOW_NW_FRAG_MASK) {
> +            goto out;
> +        }
>          if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
>              && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
>              && nw_proto != IPPROTO_ICMPV6) {
> @@ -2292,6 +2301,9 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis)
>  {
>      BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
>      uint32_t hash = basis;
> +    uint8_t nw_frag;
> +    struct miniflow *miniflow;
> +    miniflow = miniflow_create(flow);

This is causing a memory leak on every call:

=================================================================
==210226==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 64 byte(s) in 1 object(s) allocated from:
    #0 0x499ecd in malloc (/home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/vswitchd/ovs-vswitchd+0x499ecd)
    #1 0x8c808d in xmalloc__ /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/util.c:137:15
    #2 0x8c808d in xmalloc /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/util.c:172:12
    #3 0x68ff4c in miniflow_alloc /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/flow.c:3432:28
    #4 0x68ff4c in miniflow_create /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/flow.c:3456:5
    #5 0x68f7c0 in flow_hash_5tuple /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/flow.c:2306:16
    #6 0x63952d in dpif_netdev_execute /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/dpif-netdev.c:4549:32
    #7 0x63952d in dpif_netdev_operate /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/dpif-netdev.c:4605:25
    #8 0x674eb8 in dpif_operate /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/dpif.c:1372:13
    #9 0x6761a9 in dpif_execute /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../lib/dpif.c:1326:9
    #10 0x53e0bd in nxt_resume /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/ofproto-dpif.c:5771:5
    #11 0x51f20f in handle_nxt_resume /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/ofproto.c:3793:16
    #12 0x51f20f in handle_single_part_openflow /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/ofproto.c:8718:16
    #13 0x506c92 in handle_openflow /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/ofproto.c:8851:21
    #14 0x5d5a23 in ofconn_run /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/connmgr.c:1329:13
    #15 0x5d5a23 in connmgr_run /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/connmgr.c:356:9
    #16 0x503d9c in ofproto_run /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../ofproto/ofproto.c:1933:5
    #17 0x4d1a93 in bridge_run__ /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../vswitchd/bridge.c:3211:9
    #18 0x4cce6b in bridge_run /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../vswitchd/bridge.c:3270:5
    #19 0x4eb35e in main /home/runner/work/ovs/ovs/openvswitch-3.0.90/_build/sub/../../vswitchd/ovs-vswitchd.c:129:9
    #20 0x7fecd7c90082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082)

SUMMARY: AddressSanitizer: 64 byte(s) leaked in 1 allocation(s).


And you don't need to convert the flow into miniflow
in order to get the information you need.

>  
>      if (flow) {
>  
> @@ -2312,6 +2324,10 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis)
>          }
>  
>          hash = hash_add(hash, flow->nw_proto);
> +        nw_frag = MINIFLOW_GET_U8(miniflow, nw_frag);
> +        if (nw_frag & FLOW_NW_FRAG_MASK) {
> +            goto out;
> +        }
>          if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
>              && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
>              && flow->nw_proto != IPPROTO_ICMPV6) {
diff mbox series

Patch

diff --git a/lib/flow.c b/lib/flow.c
index c3a3aa3ce..9a65c304e 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1018,7 +1018,9 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                     miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                     miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
                     if (dl_type == htons(ETH_TYPE_IP)) {
-                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                        if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+                            dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                        }
                     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                         dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                     }
@@ -1033,7 +1035,9 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
                 if (dl_type == htons(ETH_TYPE_IP)) {
-                    dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                    if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                    }
                 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                     dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                 }
@@ -2248,7 +2252,7 @@  miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
 
     if (flow) {
         ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
-        uint8_t nw_proto;
+        uint8_t nw_proto, nw_frag;
 
         if (dl_type == htons(ETH_TYPE_IPV6)) {
             struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
@@ -2270,6 +2274,11 @@  miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
 
         nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
         hash = hash_add(hash, nw_proto);
+
+        nw_frag = MINIFLOW_GET_U8(flow, nw_frag);
+        if (nw_frag & FLOW_NW_FRAG_MASK) {
+            goto out;
+        }
         if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
             && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
             && nw_proto != IPPROTO_ICMPV6) {
@@ -2292,6 +2301,9 @@  flow_hash_5tuple(const struct flow *flow, uint32_t basis)
 {
     BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
     uint32_t hash = basis;
+    uint8_t nw_frag;
+    struct miniflow *miniflow;
+    miniflow = miniflow_create(flow);
 
     if (flow) {
 
@@ -2312,6 +2324,10 @@  flow_hash_5tuple(const struct flow *flow, uint32_t basis)
         }
 
         hash = hash_add(hash, flow->nw_proto);
+        nw_frag = MINIFLOW_GET_U8(miniflow, nw_frag);
+        if (nw_frag & FLOW_NW_FRAG_MASK) {
+            goto out;
+        }
         if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
             && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
             && flow->nw_proto != IPPROTO_ICMPV6) {