diff mbox series

[ovs-dev,3/3] ovn-controller: Support option ovn-allow-vips-share-hairpin-backend.

Message ID 20220705224112.2830175-3-hzhou@ovn.org
State Deferred
Headers show
Series [ovs-dev,1/3] Don't save original dst IP and Port to avoid megaflow unwildcarding. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Han Zhou July 5, 2022, 10:41 p.m. UTC
Add the option for deployments that do not require multiple LB VIPs
sharing same backends for hairpin traffic, so that they can set this
to false to be able to better utilize HW offload capabilities.

Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 controller/lflow.c              | 46 +++++++++++++++++++++++++--------
 controller/lflow.h              |  1 +
 controller/ovn-controller.8.xml | 10 +++++++
 controller/ovn-controller.c     | 13 ++++++++++
 tests/ovn.at                    | 10 +++++++
 5 files changed, 69 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/controller/lflow.c b/controller/lflow.c
index a44f6d056..35de03a21 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -1939,6 +1939,7 @@  add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
                          struct ovn_lb_backend *lb_backend,
                          uint8_t lb_proto,
                          bool use_ct_mark,
+                         bool check_ct_dst,
                          struct ovn_desired_flow_table *flow_table)
 {
     uint64_t stub[1024 / 8];
@@ -1949,11 +1950,24 @@  add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
     put_load(&value, sizeof value, MFF_LOG_FLAGS,
              MLF_LOOKUP_LB_HAIRPIN_BIT, 1, &ofpacts);
 
-    /* Matching on ct_nw_dst()/ct_ipv6_dst()/ct_tp_dst() requires matching
-     * on ct_state first.
+    /* To detect the hairpin flow accurately, the CT dst IP and ports need to
+     * be matched, to distingiush when more than one VIPs share the same
+     * hairpin backend ip+port.
+     *
+     * Matching on ct_nw_dst()/ct_ipv6_dst()/ct_tp_dst() requires matching
+     * on ct_state == DNAT first.
+     *
+     * However, matching ct_state == DNAT may prevent the flows being
+     * offloaded to HW. The option "check_ct_dst" (default to true) is
+     * provided to disable this check and the related CT fields, so that for
+     * environments that doesn't require support of VIPs sharing same backends
+     * can fully utilize HW offload capability for better performance.
      */
-    uint32_t ct_state = OVS_CS_F_TRACKED | OVS_CS_F_DST_NAT;
-    match_set_ct_state_masked(&hairpin_match, ct_state, ct_state);
+
+    if (check_ct_dst) {
+        uint32_t ct_state = OVS_CS_F_TRACKED | OVS_CS_F_DST_NAT;
+        match_set_ct_state_masked(&hairpin_match, ct_state, ct_state);
+    }
 
     if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
         ovs_be32 bip4 = in6_addr_get_mapped_ipv4(&lb_backend->ip);
@@ -1965,7 +1979,9 @@  add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
         match_set_dl_type(&hairpin_match, htons(ETH_TYPE_IP));
         match_set_nw_src(&hairpin_match, bip4);
         match_set_nw_dst(&hairpin_match, bip4);
-        match_set_ct_nw_dst(&hairpin_match, vip4);
+        if (check_ct_dst) {
+            match_set_ct_nw_dst(&hairpin_match, vip4);
+        }
 
         add_lb_vip_hairpin_reply_action(NULL, snat_vip4, lb_proto,
                                         lb_backend->port,
@@ -1980,7 +1996,9 @@  add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
         match_set_dl_type(&hairpin_match, htons(ETH_TYPE_IPV6));
         match_set_ipv6_src(&hairpin_match, bip6);
         match_set_ipv6_dst(&hairpin_match, bip6);
-        match_set_ct_ipv6_dst(&hairpin_match, &lb_vip->vip);
+        if (check_ct_dst) {
+            match_set_ct_ipv6_dst(&hairpin_match, &lb_vip->vip);
+        }
 
         add_lb_vip_hairpin_reply_action(snat_vip6, 0, lb_proto,
                                         lb_backend->port,
@@ -1991,8 +2009,10 @@  add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
     if (lb_backend->port) {
         match_set_nw_proto(&hairpin_match, lb_proto);
         match_set_tp_dst(&hairpin_match, htons(lb_backend->port));
-        match_set_ct_nw_proto(&hairpin_match, lb_proto);
-        match_set_ct_tp_dst(&hairpin_match, htons(lb_vip->vip_port));
+        if (check_ct_dst) {
+            match_set_ct_nw_proto(&hairpin_match, lb_proto);
+            match_set_ct_tp_dst(&hairpin_match, htons(lb_vip->vip_port));
+        }
     }
 
     /* In the original direction, only match on traffic that was already
@@ -2279,7 +2299,7 @@  add_lb_ct_snat_hairpin_flows(struct ovn_controller_lb *lb,
 static void
 consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
                           const struct hmap *local_datapaths,
-                          bool use_ct_mark,
+                          bool use_ct_mark, bool check_ct_dst,
                           struct ovn_desired_flow_table *flow_table,
                           struct simap *ids)
 {
@@ -2319,7 +2339,7 @@  consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
             struct ovn_lb_backend *lb_backend = &lb_vip->backends[j];
 
             add_lb_vip_hairpin_flows(lb, lb_vip, lb_backend, lb_proto,
-                                     use_ct_mark, flow_table);
+                                     use_ct_mark, check_ct_dst, flow_table);
         }
     }
 
@@ -2333,6 +2353,7 @@  consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
 static void
 add_lb_hairpin_flows(const struct sbrec_load_balancer_table *lb_table,
                      const struct hmap *local_datapaths, bool use_ct_mark,
+                     bool check_ct_dst,
                      struct ovn_desired_flow_table *flow_table,
                      struct simap *ids,
                      struct id_pool *pool)
@@ -2356,7 +2377,7 @@  add_lb_hairpin_flows(const struct sbrec_load_balancer_table *lb_table,
             simap_put(ids, lb->name, id);
         }
         consider_lb_hairpin_flows(lb, local_datapaths, use_ct_mark,
-                                  flow_table, ids);
+                                  check_ct_dst, flow_table, ids);
     }
 }
 
@@ -2494,6 +2515,7 @@  lflow_run(struct lflow_ctx_in *l_ctx_in, struct lflow_ctx_out *l_ctx_out)
                        l_ctx_out->flow_table);
     add_lb_hairpin_flows(l_ctx_in->lb_table, l_ctx_in->local_datapaths,
                          l_ctx_in->lb_hairpin_use_ct_mark,
+                         l_ctx_in->lb_hairpin_check_ct_dst,
                          l_ctx_out->flow_table,
                          l_ctx_out->hairpin_lb_ids,
                          l_ctx_out->hairpin_id_pool);
@@ -2658,6 +2680,7 @@  lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *dp,
     for (size_t i = 0; i < n_dp_lbs; i++) {
         consider_lb_hairpin_flows(dp_lbs[i], l_ctx_in->local_datapaths,
                                   l_ctx_in->lb_hairpin_use_ct_mark,
+                                  l_ctx_in->lb_hairpin_check_ct_dst,
                                   l_ctx_out->flow_table,
                                   l_ctx_out->hairpin_lb_ids);
     }
@@ -2789,6 +2812,7 @@  lflow_handle_changed_lbs(struct lflow_ctx_in *l_ctx_in,
                  UUID_ARGS(&lb->header_.uuid));
         consider_lb_hairpin_flows(lb, l_ctx_in->local_datapaths,
                                   l_ctx_in->lb_hairpin_use_ct_mark,
+                                  l_ctx_in->lb_hairpin_check_ct_dst,
                                   l_ctx_out->flow_table,
                                   l_ctx_out->hairpin_lb_ids);
     }
diff --git a/controller/lflow.h b/controller/lflow.h
index 342967917..325ba3837 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -161,6 +161,7 @@  struct lflow_ctx_in {
     const struct shash *binding_lports;
     const struct hmap *chassis_tunnels;
     bool lb_hairpin_use_ct_mark;
+    bool lb_hairpin_check_ct_dst;
 };
 
 struct lflow_ctx_out {
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index cb47c9bd1..f93cb1ace 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -352,6 +352,16 @@ 
         heplful to pin source outer IP for the tunnel when multiple interfaces
         are used on the host for overlay traffic.
       </dd>
+      <dt><code>external_ids:ovn-allow-vips-share-hairpin-backend</code></dt>
+      <dd>
+        If it is set to false, the behavior is undefined for hairpin traffic
+        when two or more LB VIPs share the same hairpin backend. The default
+        value is true - allowed. However, when set to true, there will be
+        flows that are not HW-offload friendly. So, for deployments that do not
+        require multiple LB VIPs sharing same backends for hairpin traffic, it
+        is recommended to set this to false to be able to better utilize HW
+        offload capabilities.
+      </dd>
     </dl>
 
     <p>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index dacef0204..eb9c4e4eb 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -489,6 +489,17 @@  get_ovs_chassis_id(const struct ovsrec_open_vswitch_table *ovs_table)
     return chassis_id;
 }
 
+static bool
+get_allow_vips_share_hairpin_backend(
+    const struct ovsrec_open_vswitch_table *ovs_table)
+{
+    const struct ovsrec_open_vswitch *cfg
+        = ovsrec_open_vswitch_table_first(ovs_table);
+    return cfg ? smap_get_bool(&cfg->external_ids,
+                               "ovn-allow-vips-share-hairpin-backend", true)
+               : true;
+}
+
 static void
 update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
 {
@@ -2576,6 +2587,8 @@  init_lflow_ctx(struct engine_node *node,
     l_ctx_in->binding_lports = &rt_data->lbinding_data.lports;
     l_ctx_in->chassis_tunnels = &non_vif_data->chassis_tunnels;
     l_ctx_in->lb_hairpin_use_ct_mark = n_opts->lb_hairpin_use_ct_mark;
+    l_ctx_in->lb_hairpin_check_ct_dst =
+        get_allow_vips_share_hairpin_backend(ovs_table);
 
     l_ctx_out->flow_table = &fo->flow_table;
     l_ctx_out->group_table = &fo->group_table;
diff --git a/tests/ovn.at b/tests/ovn.at
index 0dd9a1c2e..d84f4a3d1 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -28808,6 +28808,16 @@  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 | ofctl_strip_all | grep -
  table=70, priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88))
 ])
 
+# When ovn-allow-vips-share-hairpin-backend=false, don't match ct state and dst fields in table 68.
+as hv1 ovn-appctl -t ovn-controller vlog/set file:dbg
+as hv1 check ovs-vsctl set open . external_ids:ovn-allow-vips-share-hairpin-backend=false
+check ovn-nbctl --wait=hv sync
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep -v NXST], [0], [dnl
+ table=68, priority=100,ct_mark=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]])
+ table=68, priority=100,ct_mark=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]])
+ table=68, priority=100,ct_mark=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]])
+])
+
 check ovn-nbctl --wait=hv ls-del sw0
 check ovn-nbctl --wait=hv ls-del sw1