diff mbox series

[ovs-dev,3/3] northd: Avoid matching on ct.dnat flags for load balancers.

Message ID 20210223131953.20993.44943.stgit@dceara.remote.csb
State Changes Requested
Headers show
Series Handle LB VIPs that share backends and make flows offloadable. | expand

Commit Message

Dumitru Ceara Feb. 23, 2021, 1:19 p.m. UTC
Matching on ct.dnat creates openflows that often are not offloadable
to hardware.  ovn-northd uses ct.dnat only for load balancer hairpin
traffic handling and it turns out we don't really need to match on
ct.dnat.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
---
 northd/ovn-northd.8.xml |   32 +++++++++++++++--------------
 northd/ovn-northd.c     |   52 +++++++++++++++++++++++++++++------------------
 2 files changed, 49 insertions(+), 35 deletions(-)

Comments

Numan Siddique Feb. 24, 2021, 9:17 a.m. UTC | #1
On Tue, Feb 23, 2021 at 6:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> Matching on ct.dnat creates openflows that often are not offloadable
> to hardware.  ovn-northd uses ct.dnat only for load balancer hairpin
> traffic handling and it turns out we don't really need to match on
> ct.dnat.
>
> Signed-off-by: Dumitru Ceara <dceara@redhat.com>

Acked-by: Numan Siddique <numans@ovn.org>

Can you please add tests in ovn-northd for this ? This would make sure
ovn-northd-ddlog (once we have the ddlog patches)
is not out of sync with northd.

Thanks
Numan

> ---
>  northd/ovn-northd.8.xml |   32 +++++++++++++++--------------
>  northd/ovn-northd.c     |   52 +++++++++++++++++++++++++++++------------------
>  2 files changed, 49 insertions(+), 35 deletions(-)
>
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index deffe8c..a16937a 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -813,19 +813,12 @@
>        <li>
>          If the logical switch has load balancer(s) configured, then a
>          priority-100 flow is added with the match
> -        <code>ip &amp;&amp; ct.trk&amp;&amp; ct.dnat</code> to check if the
> +        <code>ip &amp;&amp; ct.trk</code> to check if the
>          packet needs to be hairpinned (if after load balancing the destination
> -        IP matches the source IP) or not by executing the action
> -        <code>reg0[6] = chk_lb_hairpin();</code> and advances the packet to
> -        the next table.
> -      </li>
> -
> -      <li>
> -        If the logical switch has load balancer(s) configured, then a
> -        priority-90 flow is added with the match <code>ip</code> to check if
> -        the packet is a reply for a hairpinned connection or not by executing
> -        the action <code>reg0[6] = chk_lb_hairpin_reply();</code> and advances
> -        the packet to the next table.
> +        IP matches the source IP) or not by executing the actions
> +        <code>reg0[6] = chk_lb_hairpin();</code> and
> +        <code>reg0[12] = chk_lb_hairpin_reply();</code> and advances the packet
> +        to the next table.
>        </li>
>
>        <li>
> @@ -838,16 +831,25 @@
>        <li>
>           If the logical switch has load balancer(s) configured, then a
>           priority-100 flow is added with the match
> -         <code>ip &amp;&amp; (ct.new || ct.est) &amp;&amp; ct.trk &amp;&amp;
> -         ct.dnat &amp;&amp; reg0[6] == 1</code> which hairpins the traffic by
> +         <code>ip &amp;&amp; ct.new &amp;&amp; ct.trk &amp;&amp;
> +         reg0[6] == 1</code> which hairpins the traffic by
>           NATting source IP to the load balancer VIP by executing the action
>           <code>ct_snat_to_vip</code> and advances the packet to the next table.
>        </li>
>
>        <li>
>           If the logical switch has load balancer(s) configured, then a
> +         priority-100 flow is added with the match
> +         <code>ip &amp;&amp; ct.est &amp;&amp; ct.trk &amp;&amp;
> +         reg0[6] == 1</code> which hairpins the traffic by
> +         NATting source IP to the load balancer VIP by executing the action
> +         <code>ct_snat</code> and advances the packet to the next table.
> +      </li>
> +
> +      <li>
> +         If the logical switch has load balancer(s) configured, then a
>           priority-90 flow is added with the match
> -         <code>ip &amp;&amp; reg0[6] == 1</code> which matches on the replies
> +         <code>ip &amp;&amp; reg0[12] == 1</code> which matches on the replies
>           of hairpinned traffic (i.e., destination IP is VIP,
>           source IP is the backend IP and source L4 port is backend port for L4
>           load balancers) and executes <code>ct_snat</code> and advances the
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 18e4cac..f66bdea 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -227,6 +227,7 @@ enum ovn_stage {
>  #define REGBIT_ACL_HINT_DROP      "reg0[9]"
>  #define REGBIT_ACL_HINT_BLOCK     "reg0[10]"
>  #define REGBIT_LKUP_FDB           "reg0[11]"
> +#define REGBIT_HAIRPIN_REPLY      "reg0[12]"
>
>  #define REG_ORIG_DIP_IPV4         "reg1"
>  #define REG_ORIG_DIP_IPV6         "xxreg1"
> @@ -266,7 +267,8 @@ enum ovn_stage {
>   *
>   * Logical Switch pipeline:
>   * +----+----------------------------------------------+---+------------------+
> - * | R0 |     REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN}      |   |                  |
> + * | R0 |     REGBIT_{CONNTRACK/DHCP/DNS}              |   |                  |
> + * |    |     REGBIT_{HAIRPIN/HAIRPIN_REPLY}           | X |                  |
>   * |    | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | X |                  |
>   * +----+----------------------------------------------+ X |                  |
>   * | R1 |         ORIG_DIP_IPV4 (>= IN_STATEFUL)       | R |                  |
> @@ -6036,39 +6038,49 @@ build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows)
>      ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 0, "1", "next;");
>
>      if (od->has_lb_vip) {
> -        /* Check if the packet needs to be hairpinned. */
> -        ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100,
> -                                "ip && ct.trk && ct.dnat",
> -                                REGBIT_HAIRPIN " = chk_lb_hairpin(); next;",
> +        /* Check if the packet needs to be hairpinned.
> +         * Set REGBIT_HAIRPIN in the original direction and
> +         * REGBIT_HAIRPIN_REPLY in the reply direction.
> +         */
> +        ovn_lflow_add_with_hint(
> +            lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100, "ip && ct.trk",
> +            REGBIT_HAIRPIN " = chk_lb_hairpin(); "
> +            REGBIT_HAIRPIN_REPLY " = chk_lb_hairpin_reply(); "
> +            "next;",
> +            &od->nbs->header_);
> +
> +        /* If packet needs to be hairpinned, snat the src ip with the VIP
> +         * for new sessions. */
> +        ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 100,
> +                                "ip && ct.new && ct.trk"
> +                                " && "REGBIT_HAIRPIN " == 1",
> +                                "ct_snat_to_vip; next;",
>                                  &od->nbs->header_);
>
> -        /* Check if the packet is a reply of hairpinned traffic. */
> -        ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 90, "ip",
> -                                REGBIT_HAIRPIN " = chk_lb_hairpin_reply(); "
> -                                "next;", &od->nbs->header_);
> -
> -        /* If packet needs to be hairpinned, snat the src ip with the VIP. */
> +        /* If packet needs to be hairpinned, for established sessions there
> +         * should already be an SNAT conntrack entry.
> +         */
>          ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 100,
> -                                "ip && (ct.new || ct.est) && ct.trk && ct.dnat"
> +                                "ip && ct.est && ct.trk"
>                                  " && "REGBIT_HAIRPIN " == 1",
> -                                "ct_snat_to_vip; next;",
> +                                "ct_snat;",
>                                  &od->nbs->header_);
>
>          /* For the reply of hairpinned traffic, snat the src ip to the VIP. */
>          ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 90,
> -                                "ip && "REGBIT_HAIRPIN " == 1", "ct_snat;",
> +                                "ip && "REGBIT_HAIRPIN_REPLY " == 1",
> +                                "ct_snat;",
>                                  &od->nbs->header_);
>
>          /* Ingress Hairpin table.
>          * - Priority 1: Packets that were SNAT-ed for hairpinning should be
>          *   looped back (i.e., swap ETH addresses and send back on inport).
>          */
> -        ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 1,
> -                      REGBIT_HAIRPIN " == 1",
> -                      "eth.dst <-> eth.src;"
> -                      "outport = inport;"
> -                      "flags.loopback = 1;"
> -                      "output;");
> +        ovn_lflow_add(
> +            lflows, od, S_SWITCH_IN_HAIRPIN, 1,
> +            "("REGBIT_HAIRPIN " == 1 || " REGBIT_HAIRPIN_REPLY " == 1)",
> +            "eth.dst <-> eth.src; outport = inport; flags.loopback = 1; "
> +            "output;");
>      }
>  }
>
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara Feb. 24, 2021, 3:01 p.m. UTC | #2
On 2/24/21 10:17 AM, Numan Siddique wrote:
> On Tue, Feb 23, 2021 at 6:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> Matching on ct.dnat creates openflows that often are not offloadable
>> to hardware.  ovn-northd uses ct.dnat only for load balancer hairpin
>> traffic handling and it turns out we don't really need to match on
>> ct.dnat.
>>
>> Signed-off-by: Dumitru Ceara <dceara@redhat.com>
> 
> Acked-by: Numan Siddique <numans@ovn.org>
> 
> Can you please add tests in ovn-northd for this ? This would make sure
> ovn-northd-ddlog (once we have the ddlog patches)
> is not out of sync with northd.

Good idea, thanks for pointing it out!

+Ben

I think that if my patches go in before the ddlog ones it's still ok to 
accept the ddlog ones in their current state.  I can work on a follow up 
to add ddlog support for the new LB hairpin logical flows soon afterwards.

> 
> Thanks
> Numan
> 

Thanks,
Dumitru
diff mbox series

Patch

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index deffe8c..a16937a 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -813,19 +813,12 @@ 
       <li>
         If the logical switch has load balancer(s) configured, then a
         priority-100 flow is added with the match
-        <code>ip &amp;&amp; ct.trk&amp;&amp; ct.dnat</code> to check if the
+        <code>ip &amp;&amp; ct.trk</code> to check if the
         packet needs to be hairpinned (if after load balancing the destination
-        IP matches the source IP) or not by executing the action
-        <code>reg0[6] = chk_lb_hairpin();</code> and advances the packet to
-        the next table.
-      </li>
-
-      <li>
-        If the logical switch has load balancer(s) configured, then a
-        priority-90 flow is added with the match <code>ip</code> to check if
-        the packet is a reply for a hairpinned connection or not by executing
-        the action <code>reg0[6] = chk_lb_hairpin_reply();</code> and advances
-        the packet to the next table.
+        IP matches the source IP) or not by executing the actions
+        <code>reg0[6] = chk_lb_hairpin();</code> and
+        <code>reg0[12] = chk_lb_hairpin_reply();</code> and advances the packet
+        to the next table.
       </li>
 
       <li>
@@ -838,16 +831,25 @@ 
       <li>
          If the logical switch has load balancer(s) configured, then a
          priority-100 flow is added with the match
-         <code>ip &amp;&amp; (ct.new || ct.est) &amp;&amp; ct.trk &amp;&amp;
-         ct.dnat &amp;&amp; reg0[6] == 1</code> which hairpins the traffic by
+         <code>ip &amp;&amp; ct.new &amp;&amp; ct.trk &amp;&amp;
+         reg0[6] == 1</code> which hairpins the traffic by
          NATting source IP to the load balancer VIP by executing the action
          <code>ct_snat_to_vip</code> and advances the packet to the next table.
       </li>
 
       <li>
          If the logical switch has load balancer(s) configured, then a
+         priority-100 flow is added with the match
+         <code>ip &amp;&amp; ct.est &amp;&amp; ct.trk &amp;&amp;
+         reg0[6] == 1</code> which hairpins the traffic by
+         NATting source IP to the load balancer VIP by executing the action
+         <code>ct_snat</code> and advances the packet to the next table.
+      </li>
+
+      <li>
+         If the logical switch has load balancer(s) configured, then a
          priority-90 flow is added with the match
-         <code>ip &amp;&amp; reg0[6] == 1</code> which matches on the replies
+         <code>ip &amp;&amp; reg0[12] == 1</code> which matches on the replies
          of hairpinned traffic (i.e., destination IP is VIP,
          source IP is the backend IP and source L4 port is backend port for L4
          load balancers) and executes <code>ct_snat</code> and advances the
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 18e4cac..f66bdea 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -227,6 +227,7 @@  enum ovn_stage {
 #define REGBIT_ACL_HINT_DROP      "reg0[9]"
 #define REGBIT_ACL_HINT_BLOCK     "reg0[10]"
 #define REGBIT_LKUP_FDB           "reg0[11]"
+#define REGBIT_HAIRPIN_REPLY      "reg0[12]"
 
 #define REG_ORIG_DIP_IPV4         "reg1"
 #define REG_ORIG_DIP_IPV6         "xxreg1"
@@ -266,7 +267,8 @@  enum ovn_stage {
  *
  * Logical Switch pipeline:
  * +----+----------------------------------------------+---+------------------+
- * | R0 |     REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN}      |   |                  |
+ * | R0 |     REGBIT_{CONNTRACK/DHCP/DNS}              |   |                  |
+ * |    |     REGBIT_{HAIRPIN/HAIRPIN_REPLY}           | X |                  |
  * |    | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | X |                  |
  * +----+----------------------------------------------+ X |                  |
  * | R1 |         ORIG_DIP_IPV4 (>= IN_STATEFUL)       | R |                  |
@@ -6036,39 +6038,49 @@  build_lb_hairpin(struct ovn_datapath *od, struct hmap *lflows)
     ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 0, "1", "next;");
 
     if (od->has_lb_vip) {
-        /* Check if the packet needs to be hairpinned. */
-        ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100,
-                                "ip && ct.trk && ct.dnat",
-                                REGBIT_HAIRPIN " = chk_lb_hairpin(); next;",
+        /* Check if the packet needs to be hairpinned.
+         * Set REGBIT_HAIRPIN in the original direction and
+         * REGBIT_HAIRPIN_REPLY in the reply direction.
+         */
+        ovn_lflow_add_with_hint(
+            lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 100, "ip && ct.trk",
+            REGBIT_HAIRPIN " = chk_lb_hairpin(); "
+            REGBIT_HAIRPIN_REPLY " = chk_lb_hairpin_reply(); "
+            "next;",
+            &od->nbs->header_);
+
+        /* If packet needs to be hairpinned, snat the src ip with the VIP
+         * for new sessions. */
+        ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 100,
+                                "ip && ct.new && ct.trk"
+                                " && "REGBIT_HAIRPIN " == 1",
+                                "ct_snat_to_vip; next;",
                                 &od->nbs->header_);
 
-        /* Check if the packet is a reply of hairpinned traffic. */
-        ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_HAIRPIN, 90, "ip",
-                                REGBIT_HAIRPIN " = chk_lb_hairpin_reply(); "
-                                "next;", &od->nbs->header_);
-
-        /* If packet needs to be hairpinned, snat the src ip with the VIP. */
+        /* If packet needs to be hairpinned, for established sessions there
+         * should already be an SNAT conntrack entry.
+         */
         ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 100,
-                                "ip && (ct.new || ct.est) && ct.trk && ct.dnat"
+                                "ip && ct.est && ct.trk"
                                 " && "REGBIT_HAIRPIN " == 1",
-                                "ct_snat_to_vip; next;",
+                                "ct_snat;",
                                 &od->nbs->header_);
 
         /* For the reply of hairpinned traffic, snat the src ip to the VIP. */
         ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_NAT_HAIRPIN, 90,
-                                "ip && "REGBIT_HAIRPIN " == 1", "ct_snat;",
+                                "ip && "REGBIT_HAIRPIN_REPLY " == 1",
+                                "ct_snat;",
                                 &od->nbs->header_);
 
         /* Ingress Hairpin table.
         * - Priority 1: Packets that were SNAT-ed for hairpinning should be
         *   looped back (i.e., swap ETH addresses and send back on inport).
         */
-        ovn_lflow_add(lflows, od, S_SWITCH_IN_HAIRPIN, 1,
-                      REGBIT_HAIRPIN " == 1",
-                      "eth.dst <-> eth.src;"
-                      "outport = inport;"
-                      "flags.loopback = 1;"
-                      "output;");
+        ovn_lflow_add(
+            lflows, od, S_SWITCH_IN_HAIRPIN, 1,
+            "("REGBIT_HAIRPIN " == 1 || " REGBIT_HAIRPIN_REPLY " == 1)",
+            "eth.dst <-> eth.src; outport = inport; flags.loopback = 1; "
+            "output;");
     }
 }