[ovs-dev] lib/flow.c: Don't include ports of first fragments in hash
diff mbox series

Message ID VI1PR07MB45572C240B81C7C5A6C0959BC8160@VI1PR07MB4557.eurprd07.prod.outlook.com
State New
Headers show
Series
  • [ovs-dev] lib/flow.c: Don't include ports of first fragments in hash
Related show

Commit Message

Van Bemmel, Jeroen (Nokia - US) June 5, 2019, 10:36 p.m. UTC
For a series of IP fragments, only the first packet includes the transport
header (TCP/UDP/SCTP) and the src/dst ports. By including these port
numbers in the hash, it may happen that a first fragment hashes to a
different value than subsequent packets, causing different packets from
the same flow to follow different paths. This in turn may result in
out-of-order delivery or failed reassembly. This patch excludes port
numbers from the hash calculation in case of IP fragmentation.

Signed-off-by: Jeroen van Bemmel <jeroen.van_bemmel@nokia.com>
---
         break;
     case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
-        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP) {
+        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP
+            && !(flow->nw_frag & FLOW_NW_FRAG_MASK) ) {
             memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
             memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
         }
@@ -2503,7 +2506,8 @@ flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,
         }
         memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
-        if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP) {
+        if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP)
+             && OVS_LIKELY(!(flow->nw_frag & FLOW_NW_FRAG_MASK))) {
             memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
             memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
         }

Comments

Darrell Ball June 6, 2019, 1:32 a.m. UTC | #1
There is a switch config, called 'frag_mode'; the default is 'normal' where
tcp ports, udp ports
and ICMP type/code are set to 0 for all fragments.

http://www.openvswitch.org/support/dist-docs/ovs-ofctl.8.pdf

This is referenced during datapath flow translation right now.
It might be worth investigating whether this config can be referenced and
folded in earlier.




On Wed, Jun 5, 2019 at 3:36 PM Van Bemmel, Jeroen (Nokia - US) <
jeroen.van_bemmel@nokia.com> wrote:

> For a series of IP fragments, only the first packet includes the transport
> header (TCP/UDP/SCTP) and the src/dst ports. By including these port
> numbers in the hash, it may happen that a first fragment hashes to a
> different value than subsequent packets, causing different packets from
> the same flow to follow different paths. This in turn may result in
> out-of-order delivery or failed reassembly. This patch excludes port
> numbers from the hash calculation in case of IP fragmentation.
>
> Signed-off-by: Jeroen van Bemmel <jeroen.van_bemmel@nokia.com>
> ---
> diff --git a/lib/flow.c b/lib/flow.c
> index f39b57f5b..0ff20a87a 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -2370,8 +2370,10 @@ flow_hash_symmetric_l3l4(const struct flow *flow,
> uint32_t basis,
>      }
>      hash = hash_add(hash, flow->nw_proto);
> -    if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
> -         (inc_udp_ports && flow->nw_proto == IPPROTO_UDP)) {
> +    if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP
> ||
> +        (inc_udp_ports && flow->nw_proto == IPPROTO_UDP))
> +        /* For first fragments, don't include ports in hash */
> +        && !(flow->nw_frag & FLOW_NW_FRAG_MASK) ) {
>          hash = hash_add(hash,
>                          (OVS_FORCE uint16_t) (flow->tp_src ^
> flow->tp_dst));
>      }
> @@ -2486,7 +2488,8 @@ flow_mask_hash_fields(const struct flow *flow,
> struct flow_wildcards *wc,
>          break;
>      case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
> -        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP) {
> +        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP
> +            && !(flow->nw_frag & FLOW_NW_FRAG_MASK) ) {
>              memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
>              memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
>          }
> @@ -2503,7 +2506,8 @@ flow_mask_hash_fields(const struct flow *flow,
> struct flow_wildcards *wc,
>          }
>          memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
> -        if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto ==
> IPPROTO_SCTP) {
> +        if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto ==
> IPPROTO_SCTP)
> +             && OVS_LIKELY(!(flow->nw_frag & FLOW_NW_FRAG_MASK))) {
>              memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
>              memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
>          }
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Van Bemmel, Jeroen (Nokia - US) June 6, 2019, 2:10 a.m. UTC | #2
In ofproto/ofproto-dpif.c function rule_dpif_lookup_from_table the first fragment ports are _temporarily_ set to 0 for the purpose of flow lookup.
However, the ports are then restored, and multipath code sees non-0 ports for the first fragment

We could skip the saving/restore of the original ports, and clear the ports permanently in ‘normal’ frag_mode. It does seem inconsistent to consider the ports 0 for flow lookup, but then use non-0 ports for the first fragment for OVS action processing.

Regards,
Jeroen

From: Darrell Ball <dlu998@gmail.com>
Sent: Wednesday, June 5, 2019 8:33 PM
To: Van Bemmel, Jeroen (Nokia - US) <jeroen.van_bemmel@nokia.com>
Cc: ovs-dev@openvswitch.org
Subject: Re: [ovs-dev] [PATCH] lib/flow.c: Don't include ports of first fragments in hash

There is a switch config, called 'frag_mode'; the default is 'normal' where tcp ports, udp ports
and ICMP type/code are set to 0 for all fragments.

http://www.openvswitch.org/support/dist-docs/ovs-ofctl.8.pdf

This is referenced during datapath flow translation right now.
It might be worth investigating whether this config can be referenced and folded in earlier.




On Wed, Jun 5, 2019 at 3:36 PM Van Bemmel, Jeroen (Nokia - US) <jeroen.van_bemmel@nokia.com<mailto:jeroen.van_bemmel@nokia.com>> wrote:
For a series of IP fragments, only the first packet includes the transport
header (TCP/UDP/SCTP) and the src/dst ports. By including these port
numbers in the hash, it may happen that a first fragment hashes to a
different value than subsequent packets, causing different packets from
the same flow to follow different paths. This in turn may result in
out-of-order delivery or failed reassembly. This patch excludes port
numbers from the hash calculation in case of IP fragmentation.

Signed-off-by: Jeroen van Bemmel <jeroen.van_bemmel@nokia.com<mailto:jeroen.van_bemmel@nokia.com>>
---
diff --git a/lib/flow.c b/lib/flow.c
index f39b57f5b..0ff20a87a 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -2370,8 +2370,10 @@ flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
     }
     hash = hash_add(hash, flow->nw_proto);
-    if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
-         (inc_udp_ports && flow->nw_proto == IPPROTO_UDP)) {
+    if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
+        (inc_udp_ports && flow->nw_proto == IPPROTO_UDP))
+        /* For first fragments, don't include ports in hash */
+        && !(flow->nw_frag & FLOW_NW_FRAG_MASK) ) {
         hash = hash_add(hash,
                         (OVS_FORCE uint16_t) (flow->tp_src ^ flow->tp_dst));
     }
@@ -2486,7 +2488,8 @@ flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,
         break;
     case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
-        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP) {
+        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP
+            && !(flow->nw_frag & FLOW_NW_FRAG_MASK) ) {
             memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
             memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
         }
@@ -2503,7 +2506,8 @@ flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,
         }
         memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
-        if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP) {
+        if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP)
+             && OVS_LIKELY(!(flow->nw_frag & FLOW_NW_FRAG_MASK))) {
             memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
             memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
         }
Ben Pfaff June 7, 2019, 6:28 p.m. UTC | #3
On Wed, Jun 05, 2019 at 10:36:33PM +0000, Van Bemmel, Jeroen (Nokia - US) wrote:
> For a series of IP fragments, only the first packet includes the transport
> header (TCP/UDP/SCTP) and the src/dst ports. By including these port
> numbers in the hash, it may happen that a first fragment hashes to a
> different value than subsequent packets, causing different packets from
> the same flow to follow different paths. This in turn may result in
> out-of-order delivery or failed reassembly. This patch excludes port
> numbers from the hash calculation in case of IP fragmentation.
> 
> Signed-off-by: Jeroen van Bemmel <jeroen.van_bemmel@nokia.com>

I applied this to master, thanks!

Patch
diff mbox series

diff --git a/lib/flow.c b/lib/flow.c
index f39b57f5b..0ff20a87a 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -2370,8 +2370,10 @@  flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
     }
     hash = hash_add(hash, flow->nw_proto);
-    if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
-         (inc_udp_ports && flow->nw_proto == IPPROTO_UDP)) {
+    if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
+        (inc_udp_ports && flow->nw_proto == IPPROTO_UDP))
+        /* For first fragments, don't include ports in hash */
+        && !(flow->nw_frag & FLOW_NW_FRAG_MASK) ) {
         hash = hash_add(hash,
                         (OVS_FORCE uint16_t) (flow->tp_src ^ flow->tp_dst));
     }
@@ -2486,7 +2488,8 @@  flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,