diff mbox series

[ovs-dev,v8] controller: Prevent race in packet buffering

Message ID 20230516131423.277665-1-amusil@redhat.com
State Accepted
Headers show
Series [ovs-dev,v8] controller: Prevent race in packet buffering | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Ales Musil May 16, 2023, 1:14 p.m. UTC
There was a race within packet buffering that could
result in first packt being dropped. It could happen
under following conditions and topology:
S1 == R1 == public == R2 == S2
SNAT on R1 and DGP on port connecting R1 with public.

1) The GARP is sent for the DGP SNAT
2) The GARP is delayed on R2 because it's multicast
3) The MAC binding is added to SB
4) Some traffic that gets buffered on S2
5) An ARP is sent as consequence of the buffering
6) Response for the ARP is ignored on lflow level,
because the MAC binding already exists
7) The buffered packet is never sent out and times out

In order to prevent the race add additonal lookup for the
buffered packets. When the packet is created do initial
lookup right away as there is a chance that the MAC binding
is already present in database. In any other case that would
prevent the tracked loop to provide correct information do
SB lookup, but only after certain timeout.

Signed-off-by: Ales Musil <amusil@redhat.com>
---
v8: Address comments from Dumitru:
    Remove the "next_lookup_at_ms" as it is not needed.
---
 controller/mac-learn.c | 105 +++++++++++++++++++++++++++++++++++++----
 controller/mac-learn.h |  17 +++++--
 controller/pinctrl.c   |  47 +++++++++---------
 3 files changed, 132 insertions(+), 37 deletions(-)

Comments

Dumitru Ceara May 16, 2023, 3:20 p.m. UTC | #1
On 5/16/23 15:14, Ales Musil wrote:
> There was a race within packet buffering that could
> result in first packt being dropped. It could happen
> under following conditions and topology:
> S1 == R1 == public == R2 == S2
> SNAT on R1 and DGP on port connecting R1 with public.
> 
> 1) The GARP is sent for the DGP SNAT
> 2) The GARP is delayed on R2 because it's multicast
> 3) The MAC binding is added to SB
> 4) Some traffic that gets buffered on S2
> 5) An ARP is sent as consequence of the buffering
> 6) Response for the ARP is ignored on lflow level,
> because the MAC binding already exists
> 7) The buffered packet is never sent out and times out
> 
> In order to prevent the race add additonal lookup for the
> buffered packets. When the packet is created do initial
> lookup right away as there is a chance that the MAC binding
> is already present in database. In any other case that would
> prevent the tracked loop to provide correct information do
> SB lookup, but only after certain timeout.
> 
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
> v8: Address comments from Dumitru:
>     Remove the "next_lookup_at_ms" as it is not needed.
> ---

Looks good to me, thanks for addressing all my comments!

Acked-by: Dumitru Ceara <dceara@redhat.com>

Regards,
Dumitru
Mark Michelson May 19, 2023, 5:08 p.m. UTC | #2
Thanks Dumitru and Ales.

I pushed this change to main. I attempted to backport this (as well as 
the previous patches from this series) to branch-23.03 but they did not 
apply cleanly. If you can provide a series that applies to branch-23.03, 
then I can get it backported there and attempt to apply it to older 
branches too.

On 5/16/23 11:20, Dumitru Ceara wrote:
> On 5/16/23 15:14, Ales Musil wrote:
>> There was a race within packet buffering that could
>> result in first packt being dropped. It could happen
>> under following conditions and topology:
>> S1 == R1 == public == R2 == S2
>> SNAT on R1 and DGP on port connecting R1 with public.
>>
>> 1) The GARP is sent for the DGP SNAT
>> 2) The GARP is delayed on R2 because it's multicast
>> 3) The MAC binding is added to SB
>> 4) Some traffic that gets buffered on S2
>> 5) An ARP is sent as consequence of the buffering
>> 6) Response for the ARP is ignored on lflow level,
>> because the MAC binding already exists
>> 7) The buffered packet is never sent out and times out
>>
>> In order to prevent the race add additonal lookup for the
>> buffered packets. When the packet is created do initial
>> lookup right away as there is a chance that the MAC binding
>> is already present in database. In any other case that would
>> prevent the tracked loop to provide correct information do
>> SB lookup, but only after certain timeout.
>>
>> Signed-off-by: Ales Musil <amusil@redhat.com>
>> ---
>> v8: Address comments from Dumitru:
>>      Remove the "next_lookup_at_ms" as it is not needed.
>> ---
> 
> Looks good to me, thanks for addressing all my comments!
> 
> Acked-by: Dumitru Ceara <dceara@redhat.com>
> 
> Regards,
> Dumitru
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/mac-learn.c b/controller/mac-learn.c
index 296a70242..071f01b4f 100644
--- a/controller/mac-learn.c
+++ b/controller/mac-learn.c
@@ -23,12 +23,16 @@ 
 #include "lib/packets.h"
 #include "lib/smap.h"
 #include "lib/timeval.h"
+#include "lport.h"
+#include "ovn-sb-idl.h"
 
 VLOG_DEFINE_THIS_MODULE(mac_learn);
 
-#define MAX_FDB_ENTRIES      1000
-#define MAX_BUFFERED_PACKETS 1000
-#define BUFFER_QUEUE_DEPTH   4
+#define MAX_FDB_ENTRIES             1000
+#define MAX_BUFFERED_PACKETS        1000
+#define BUFFER_QUEUE_DEPTH          4
+#define BUFFERED_PACKETS_TIMEOUT_MS 10000
+#define BUFFERED_PACKETS_LOOKUP_MS  100
 
 static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
                            struct in6_addr *ip);
@@ -45,6 +49,13 @@  buffered_packets_find(struct buffered_packets_ctx *ctx, uint64_t dp_key,
                       uint64_t port_key, struct in6_addr *ip, uint32_t hash);
 static void ovn_buffered_packets_remove(struct buffered_packets_ctx *ctx,
                                         struct buffered_packets *bp);
+static void
+buffered_packets_db_lookup(struct buffered_packets *bp,
+                           struct ds *ip, struct eth_addr *mac,
+                           struct ovsdb_idl_index *sbrec_pb_by_key,
+                           struct ovsdb_idl_index *sbrec_dp_by_key,
+                           struct ovsdb_idl_index *sbrec_pb_by_name,
+                           struct ovsdb_idl_index *sbrec_mb_by_lport_ip);
 
 /* mac_binding functions. */
 void
@@ -121,6 +132,23 @@  ovn_mac_binding_timed_out(const struct mac_binding *mb, long long now)
     return now >= mb->timeout_at_ms;
 }
 
+const struct sbrec_mac_binding *
+ovn_mac_binding_lookup(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+                       const char *logical_port, const char *ip)
+{
+    struct sbrec_mac_binding *mb =
+        sbrec_mac_binding_index_init_row(sbrec_mac_binding_by_lport_ip);
+    sbrec_mac_binding_index_set_logical_port(mb, logical_port);
+    sbrec_mac_binding_index_set_ip(mb, ip);
+
+    const struct sbrec_mac_binding *retval =
+        sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip, mb);
+
+    sbrec_mac_binding_index_destroy_row(mb);
+
+    return retval;
+}
+
 /* fdb functions. */
 void
 ovn_fdb_init(struct hmap *fdbs)
@@ -212,10 +240,13 @@  ovn_buffered_packets_add(struct buffered_packets_ctx *ctx, uint64_t dp_key,
         bp->ip = ip;
         bp->dp_key = dp_key;
         bp->port_key = port_key;
+        /* Schedule the freshly added buffered packet to do lookup
+         * immediately. */
+        bp->lookup_at_ms = 0;
         ovs_list_init(&bp->queue);
     }
 
-    bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT_MS;
+    bp->expire_at_ms = time_msec() + BUFFERED_PACKETS_TIMEOUT_MS;
 
     return bp;
 }
@@ -235,15 +266,21 @@  ovn_buffered_packets_packet_data_enqueue(struct buffered_packets *bp,
 
 void
 ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
-                             const struct mac_bindings_map *recent_mbs)
+                             const struct mac_bindings_map *recent_mbs,
+                             struct ovsdb_idl_index *sbrec_pb_by_key,
+                             struct ovsdb_idl_index *sbrec_dp_by_key,
+                             struct ovsdb_idl_index *sbrec_pb_by_name,
+                             struct ovsdb_idl_index *sbrec_mb_by_lport_ip)
 {
+    struct ds ip = DS_EMPTY_INITIALIZER;
     long long now = time_msec();
 
     struct buffered_packets *bp;
 
     HMAP_FOR_EACH_SAFE (bp, hmap_node, &ctx->buffered_packets) {
+        struct eth_addr mac = eth_addr_zero;
         /* Remove expired buffered packets. */
-        if (now > bp->expire) {
+        if (now > bp->expire_at_ms) {
             ovn_buffered_packets_remove(ctx, bp);
             continue;
         }
@@ -252,20 +289,34 @@  ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
         struct mac_binding *mb = mac_binding_find(recent_mbs, bp->dp_key,
                                                   bp->port_key, &bp->ip, hash);
 
-        if (!mb) {
+        if (mb) {
+            mac = mb->mac;
+        } else if (now >= bp->lookup_at_ms) {
+            /* Check if we can do a full lookup. */
+            buffered_packets_db_lookup(bp, &ip, &mac, sbrec_pb_by_key,
+                                       sbrec_dp_by_key, sbrec_pb_by_name,
+                                       sbrec_mb_by_lport_ip);
+            /* Schedule next lookup even if we found the MAC address,
+             * if the address was found this struct will be deleted anyway. */
+            bp->lookup_at_ms = now + BUFFERED_PACKETS_LOOKUP_MS;
+        }
+
+        if (eth_addr_is_zero(mac)) {
             continue;
         }
 
         struct packet_data *pd;
         LIST_FOR_EACH_POP (pd, node, &bp->queue) {
             struct eth_header *eth = dp_packet_data(pd->p);
-            eth->eth_dst = mb->mac;
+            eth->eth_dst = mac;
 
             ovs_list_push_back(&ctx->ready_packets_data, &pd->node);
         }
 
         ovn_buffered_packets_remove(ctx, bp);
     }
+
+    ds_destroy(&ip);
 }
 
 bool
@@ -379,3 +430,41 @@  ovn_buffered_packets_remove(struct buffered_packets_ctx *ctx,
     hmap_remove(&ctx->buffered_packets, &bp->hmap_node);
     free(bp);
 }
+
+static void
+buffered_packets_db_lookup(struct buffered_packets *bp, struct ds *ip,
+                           struct eth_addr *mac,
+                           struct ovsdb_idl_index *sbrec_pb_by_key,
+                           struct ovsdb_idl_index *sbrec_dp_by_key,
+                           struct ovsdb_idl_index *sbrec_pb_by_name,
+                           struct ovsdb_idl_index *sbrec_mb_by_lport_ip)
+{
+    const struct sbrec_port_binding *pb = lport_lookup_by_key(sbrec_dp_by_key,
+                                                              sbrec_pb_by_key,
+                                                              bp->dp_key,
+                                                              bp->port_key);
+    if (!pb) {
+        return;
+    }
+
+    if (!strcmp(pb->type, "chassisredirect")) {
+        const char *dgp_name =
+            smap_get_def(&pb->options, "distributed-port", "");
+        pb = lport_lookup_by_name(sbrec_pb_by_name, dgp_name);
+        if (!pb) {
+            return;
+        }
+    }
+
+    ipv6_format_mapped(&bp->ip, ip);
+    const struct sbrec_mac_binding *smb =
+        ovn_mac_binding_lookup(sbrec_mb_by_lport_ip, pb->logical_port,
+                               ds_cstr_ro(ip));
+    ds_clear(ip);
+
+    if (!smb) {
+        return;
+    }
+
+    eth_addr_from_string(smb->mac, mac);
+}
diff --git a/controller/mac-learn.h b/controller/mac-learn.h
index bc99d09f3..e0fd6a8d1 100644
--- a/controller/mac-learn.h
+++ b/controller/mac-learn.h
@@ -25,6 +25,8 @@ 
 #include "openvswitch/list.h"
 #include "openvswitch/ofpbuf.h"
 
+struct ovsdb_idl_index;
+
 struct mac_binding {
     struct hmap_node hmap_node; /* In a hmap. */
 
@@ -61,6 +63,9 @@  struct mac_binding *ovn_mac_binding_add(struct mac_bindings_map *mac_bindings,
                                         struct in6_addr *ip,
                                         struct eth_addr mac,
                                         uint32_t timeout_ms);
+const struct sbrec_mac_binding *
+ovn_mac_binding_lookup(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+                       const char *logical_port, const char *ip);
 
 
 struct fdb_entry {
@@ -82,7 +87,6 @@  struct fdb_entry *ovn_fdb_add(struct hmap *fdbs,
                               uint32_t dp_key, struct eth_addr mac,
                               uint32_t port_key);
 
-#define OVN_BUFFERED_PACKETS_TIMEOUT_MS 10000
 
 struct packet_data {
     struct ovs_list node;
@@ -102,7 +106,10 @@  struct buffered_packets {
     struct ovs_list queue;
 
     /* Timestamp in ms when the buffered packet should expire. */
-    long long int expire;
+    long long int expire_at_ms;
+
+    /* Timestamp in ms when the buffered packet should do full SB lookup.*/
+    long long int lookup_at_ms;
 };
 
 struct buffered_packets_ctx {
@@ -123,7 +130,11 @@  void ovn_buffered_packets_packet_data_enqueue(struct buffered_packets *bp,
                                               struct packet_data *pd);
 void
 ovn_buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
-                             const struct mac_bindings_map *recent_mbs);
+                             const struct mac_bindings_map *recent_mbs,
+                             struct ovsdb_idl_index *sbrec_pb_by_key,
+                             struct ovsdb_idl_index *sbrec_dp_by_key,
+                             struct ovsdb_idl_index *sbrec_pb_by_name,
+                             struct ovsdb_idl_index *sbrec_mb_by_lport_ip);
 void ovn_buffered_packets_ctx_init(struct buffered_packets_ctx *ctx);
 void ovn_buffered_packets_ctx_destroy(struct buffered_packets_ctx *ctx);
 bool
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 1694b0445..52daa8fd3 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -181,8 +181,11 @@  static struct pinctrl pinctrl;
 static void init_buffered_packets_ctx(void);
 static void destroy_buffered_packets_ctx(void);
 static void
-run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
-                     const struct sbrec_mac_binding_table *mac_binding_table)
+run_buffered_binding(const struct sbrec_mac_binding_table *mac_binding_table,
+                     struct ovsdb_idl_index *sbrec_port_binding_by_key,
+                     struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
+                     struct ovsdb_idl_index *sbrec_port_binding_by_name,
+                     struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip)
     OVS_REQUIRES(pinctrl_mutex);
 
 static void pinctrl_handle_put_mac_binding(const struct flow *md,
@@ -3500,7 +3503,10 @@  pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                   sbrec_port_binding_by_key,
                   sbrec_igmp_groups,
                   sbrec_ip_multicast_opts);
-    run_buffered_binding(sbrec_port_binding_by_name, mac_binding_table);
+    run_buffered_binding(mac_binding_table, sbrec_port_binding_by_key,
+                         sbrec_datapath_binding_by_key,
+                         sbrec_port_binding_by_name,
+                         sbrec_mac_binding_by_lport_ip);
     sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
                       chassis);
     bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
@@ -4134,25 +4140,6 @@  send_mac_binding_buffered_pkts(struct rconn *swconn)
     ovs_list_init(&buffered_packets_ctx.ready_packets_data);
 }
 
-static const struct sbrec_mac_binding *
-mac_binding_lookup(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
-                   const char *logical_port,
-                   const char *ip)
-{
-    struct sbrec_mac_binding *mb = sbrec_mac_binding_index_init_row(
-        sbrec_mac_binding_by_lport_ip);
-    sbrec_mac_binding_index_set_logical_port(mb, logical_port);
-    sbrec_mac_binding_index_set_ip(mb, ip);
-
-    const struct sbrec_mac_binding *retval
-        = sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip,
-                                       mb);
-
-    sbrec_mac_binding_index_destroy_row(mb);
-
-    return retval;
-}
-
 /* Update or add an IP-MAC binding for 'logical_port'.
  * Caller should make sure that 'ovnsb_idl_txn' is valid. */
 static void
@@ -4168,7 +4155,8 @@  mac_binding_add_to_sb(struct ovsdb_idl_txn *ovnsb_idl_txn,
     snprintf(mac_string, sizeof mac_string, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
 
     const struct sbrec_mac_binding *b =
-        mac_binding_lookup(sbrec_mac_binding_by_lport_ip, logical_port, ip);
+        ovn_mac_binding_lookup(sbrec_mac_binding_by_lport_ip,
+                               logical_port, ip);
     if (!b) {
         if (update_only) {
             return;
@@ -4291,8 +4279,11 @@  run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn,
 }
 
 static void
-run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
-                     const struct sbrec_mac_binding_table *mac_binding_table)
+run_buffered_binding(const struct sbrec_mac_binding_table *mac_binding_table,
+                     struct ovsdb_idl_index *sbrec_port_binding_by_key,
+                     struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
+                     struct ovsdb_idl_index *sbrec_port_binding_by_name,
+                     struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip)
     OVS_REQUIRES(pinctrl_mutex)
 {
     if (!ovn_buffered_packets_ctx_has_packets(&buffered_packets_ctx)) {
@@ -4341,7 +4332,11 @@  run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
                             pb->tunnel_key, &ip, mac, 0);
     }
 
-    ovn_buffered_packets_ctx_run(&buffered_packets_ctx, &recent_mbs);
+    ovn_buffered_packets_ctx_run(&buffered_packets_ctx, &recent_mbs,
+                                 sbrec_port_binding_by_key,
+                                 sbrec_datapath_binding_by_key,
+                                 sbrec_port_binding_by_name,
+                                 sbrec_mac_binding_by_lport_ip);
 
     ovn_mac_bindings_map_destroy(&recent_mbs);