diff mbox series

[ovs-dev,v2,3/8] northd: Optimize locking pattern for GW LR NAT flows for LB.

Message ID 20230207114302.1908836-4-i.maximets@ovn.org
State Superseded
Delegated to: Mark Michelson
Headers show
Series northd: Hash locks and lflow creation with dp groups. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_ovn-kubernetes success github build: passed
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Ilya Maximets Feb. 7, 2023, 11:42 a.m. UTC
build_gw_lrouter_nat_flows_for_lb() function generates or adds one
datapath to a group for two logical flows for one of 3 different flow
types.  This means that if we don't want to lock/unlcok for every
operation on every call, we need to hold 6 different lflow hash locks.
But hash locks can not be nested for thread-safety reasons.

Instead, collecting all the affected datapaths into bitmaps per
flow type and creating all the similar flows together, so we don't
need to hold more than one hash lock at a time.

Using bitmaps, since they require much less memory than arrays of
pointers and generally easier to work with.

This change on its own doesn't provide significant performance benefits,
because constructing extra bitmaps takes extra time.  But it provides
necessary infrastructure for the next commits.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 northd/northd.c | 94 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 57 insertions(+), 37 deletions(-)

Comments

Ilya Maximets Feb. 7, 2023, 6:44 p.m. UTC | #1
On 2/7/23 12:42, Ilya Maximets wrote:
> build_gw_lrouter_nat_flows_for_lb() function generates or adds one
> datapath to a group for two logical flows for one of 3 different flow
> types.  This means that if we don't want to lock/unlcok for every
> operation on every call, we need to hold 6 different lflow hash locks.
> But hash locks can not be nested for thread-safety reasons.
> 
> Instead, collecting all the affected datapaths into bitmaps per
> flow type and creating all the similar flows together, so we don't
> need to hold more than one hash lock at a time.
> 
> Using bitmaps, since they require much less memory than arrays of
> pointers and generally easier to work with.
> 
> This change on its own doesn't provide significant performance benefits,
> because constructing extra bitmaps takes extra time.  But it provides
> necessary infrastructure for the next commits.
> 
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>  northd/northd.c | 94 ++++++++++++++++++++++++++++++-------------------
>  1 file changed, 57 insertions(+), 37 deletions(-)
> 
> diff --git a/northd/northd.c b/northd/northd.c
> index 4ddfc3af3..e06977f7c 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10514,13 +10514,14 @@ get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
>      return true;
>  }
>  
> -#define LROUTER_NAT_LB_FLOW_INIT(MATCH, ACTION, PRIO) \
> -        (struct lrouter_nat_lb_flow)                  \
> -        { .action = (ACTION), .lflow_ref = NULL,      \
> -          .hash = ovn_logical_flow_hash(              \
> -          ovn_stage_get_table(S_ROUTER_IN_DNAT),      \
> -          ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),   \
> -          (PRIO), ds_cstr(MATCH), (ACTION)) }
> +#define LROUTER_NAT_LB_FLOW_INIT(MATCH, ACTION, PRIO)     \
> +    (struct lrouter_nat_lb_flow) {                        \
> +        .action = (ACTION),                               \
> +        .hash = ovn_logical_flow_hash(                    \
> +            ovn_stage_get_table(S_ROUTER_IN_DNAT),        \
> +            ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),     \
> +            (PRIO), ds_cstr(MATCH), (ACTION)),            \
> +    }
>  
>  enum lrouter_nat_lb_flow_type {
>      LROUTER_NAT_LB_FLOW_NORMAL = 0,
> @@ -10531,8 +10532,6 @@ enum lrouter_nat_lb_flow_type {
>  
>  struct lrouter_nat_lb_flow {
>      char *action;
> -    struct ovn_lflow *lflow_ref;
> -
>      uint32_t hash;
>  };
>  
> @@ -10540,6 +10539,8 @@ struct lrouter_nat_lb_flows_ctx {
>      struct lrouter_nat_lb_flow new[LROUTER_NAT_LB_FLOW_MAX];
>      struct lrouter_nat_lb_flow est[LROUTER_NAT_LB_FLOW_MAX];
>  
> +    unsigned long *dp_bitmap[LROUTER_NAT_LB_FLOW_MAX];
> +
>      struct ds *new_match;
>      struct ds *est_match;
>      struct ds *undnat_match;
> @@ -10607,37 +10608,50 @@ build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
>  }
>  
>  static void
> -build_gw_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
> -                                  enum lrouter_nat_lb_flow_type type,
> -                                  struct ovn_datapath *od)
> -{
> -    const char *meter = NULL;
> -    struct lrouter_nat_lb_flow *new_flow = &ctx->new[type];
> -    struct lrouter_nat_lb_flow *est_flow = &ctx->est[type];
> -    struct ovs_mutex *hash_lock;
> +build_gw_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx)
> +{
> +    for (size_t type = 0; type < LROUTER_NAT_LB_FLOW_MAX; type++) {
> +        struct ovn_lflow *new_lflow_ref = NULL, *est_lflow_ref = NULL;
> +        struct lrouter_nat_lb_flow *new_flow = &ctx->new[type];
> +        struct lrouter_nat_lb_flow *est_flow = &ctx->est[type];
> +        struct ovs_mutex *hash_lock;
> +        size_t index;
> +
> +        hash_lock = lflow_hash_lock(ctx->lflows, new_flow->hash);
> +        BITMAP_FOR_EACH_1 (index, n_datapaths, ctx->dp_bitmap[type]) {
> +            struct ovn_datapath *od = datapaths_array[index];
> +            const char *meter = NULL;
> +            struct ovn_lflow *lflow;
> +
> +            if (ctx->reject) {
> +                meter = copp_meter_get(COPP_REJECT, od->nbr->copp,
> +                                       ctx->meter_groups);
> +            }
>  
> -    if (ctx->reject) {
> -        meter = copp_meter_get(COPP_REJECT, od->nbr->copp, ctx->meter_groups);
> -    }
> +            if (meter ||
> +                !ovn_dp_group_add_with_reference(new_lflow_ref, od)) {
> +                lflow = ovn_lflow_add_at_with_hash(ctx->lflows, od,
> +                        S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->new_match),
> +                        new_flow->action, NULL, meter, &ctx->lb->nlb->header_,
> +                        OVS_SOURCE_LOCATOR, new_flow->hash);
> +                new_lflow_ref = meter ? NULL : lflow;
> +            }
> +        }
> +        lflow_hash_unlock(hash_lock);
>  
> -    hash_lock = lflow_hash_lock(ctx->lflows, new_flow->hash);
> -    if (meter || !ovn_dp_group_add_with_reference(new_flow->lflow_ref, od)) {
> -        struct ovn_lflow *lflow = ovn_lflow_add_at_with_hash(ctx->lflows, od,
> -                S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->new_match),
> -                new_flow->action, NULL, meter, &ctx->lb->nlb->header_,
> -                OVS_SOURCE_LOCATOR, new_flow->hash);
> -        new_flow->lflow_ref = meter ? NULL : lflow;
> -    }
> -    lflow_hash_unlock(hash_lock);
> +        hash_lock = lflow_hash_lock(ctx->lflows, est_flow->hash);
> +        BITMAP_FOR_EACH_1 (index, n_datapaths, ctx->dp_bitmap[type]) {
> +            struct ovn_datapath *od = datapaths_array[index];
>  
> -    hash_lock = lflow_hash_lock(ctx->lflows, est_flow->hash);
> -    if (!ovn_dp_group_add_with_reference(est_flow->lflow_ref, od)) {
> -        est_flow->lflow_ref = ovn_lflow_add_at_with_hash(ctx->lflows, od,
> -                S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->est_match),
> -                est_flow->action, NULL, NULL, &ctx->lb->nlb->header_,
> -                OVS_SOURCE_LOCATOR, est_flow->hash);
> +            if (!ovn_dp_group_add_with_reference(est_lflow_ref, od)) {
> +                est_lflow_ref = ovn_lflow_add_at_with_hash(ctx->lflows, od,
> +                        S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->est_match),
> +                        est_flow->action, NULL, NULL, &ctx->lb->nlb->header_,
> +                        OVS_SOURCE_LOCATOR, est_flow->hash);
> +            }
> +        }
> +        lflow_hash_unlock(hash_lock);
>      }
> -    lflow_hash_unlock(hash_lock);
>  }
>  
>  static void
> @@ -10748,6 +10762,10 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
>          LROUTER_NAT_LB_FLOW_INIT(&est_match,
>                                   "flags.force_snat_for_lb = 1; next;", prio);
>  
> +    for (size_t i = 0; i < LROUTER_NAT_LB_FLOW_MAX; i++) {
> +        ctx.dp_bitmap[i] = bitmap_allocate(n_datapaths);
> +    }

Ugh.
I forgot to free these bitmaps in this patch.  They are freed in the
next one.  That's why GHA failed here (failures on other patches seems
to be due to flaky tests).

I can fix the leak in v3, once the patch set is reviewed.

Best regards, Ilya Maximets.

> +
>      struct ovn_datapath **lb_aff_force_snat_router =
>          xcalloc(lb->n_nb_lr, sizeof *lb_aff_force_snat_router);
>      int n_lb_aff_force_snat_router = 0;
> @@ -10773,7 +10791,7 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
>          }
>  
>          if (!od->n_l3dgw_ports) {
> -            build_gw_lrouter_nat_flows_for_lb(&ctx, type, od);
> +            bitmap_set1(ctx.dp_bitmap[type], od->index);
>          } else {
>              build_distr_lrouter_nat_flows_for_lb(&ctx, type, od);
>          }
> @@ -10803,6 +10821,8 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
>          }
>      }
>  
> +    build_gw_lrouter_nat_flows_for_lb(&ctx);
> +
>      /* LB affinity flows for datapaths where CMS has specified
>       * force_snat_for_lb floag option.
>       */
diff mbox series

Patch

diff --git a/northd/northd.c b/northd/northd.c
index 4ddfc3af3..e06977f7c 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10514,13 +10514,14 @@  get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
     return true;
 }
 
-#define LROUTER_NAT_LB_FLOW_INIT(MATCH, ACTION, PRIO) \
-        (struct lrouter_nat_lb_flow)                  \
-        { .action = (ACTION), .lflow_ref = NULL,      \
-          .hash = ovn_logical_flow_hash(              \
-          ovn_stage_get_table(S_ROUTER_IN_DNAT),      \
-          ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),   \
-          (PRIO), ds_cstr(MATCH), (ACTION)) }
+#define LROUTER_NAT_LB_FLOW_INIT(MATCH, ACTION, PRIO)     \
+    (struct lrouter_nat_lb_flow) {                        \
+        .action = (ACTION),                               \
+        .hash = ovn_logical_flow_hash(                    \
+            ovn_stage_get_table(S_ROUTER_IN_DNAT),        \
+            ovn_stage_get_pipeline(S_ROUTER_IN_DNAT),     \
+            (PRIO), ds_cstr(MATCH), (ACTION)),            \
+    }
 
 enum lrouter_nat_lb_flow_type {
     LROUTER_NAT_LB_FLOW_NORMAL = 0,
@@ -10531,8 +10532,6 @@  enum lrouter_nat_lb_flow_type {
 
 struct lrouter_nat_lb_flow {
     char *action;
-    struct ovn_lflow *lflow_ref;
-
     uint32_t hash;
 };
 
@@ -10540,6 +10539,8 @@  struct lrouter_nat_lb_flows_ctx {
     struct lrouter_nat_lb_flow new[LROUTER_NAT_LB_FLOW_MAX];
     struct lrouter_nat_lb_flow est[LROUTER_NAT_LB_FLOW_MAX];
 
+    unsigned long *dp_bitmap[LROUTER_NAT_LB_FLOW_MAX];
+
     struct ds *new_match;
     struct ds *est_match;
     struct ds *undnat_match;
@@ -10607,37 +10608,50 @@  build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
 }
 
 static void
-build_gw_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
-                                  enum lrouter_nat_lb_flow_type type,
-                                  struct ovn_datapath *od)
-{
-    const char *meter = NULL;
-    struct lrouter_nat_lb_flow *new_flow = &ctx->new[type];
-    struct lrouter_nat_lb_flow *est_flow = &ctx->est[type];
-    struct ovs_mutex *hash_lock;
+build_gw_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx)
+{
+    for (size_t type = 0; type < LROUTER_NAT_LB_FLOW_MAX; type++) {
+        struct ovn_lflow *new_lflow_ref = NULL, *est_lflow_ref = NULL;
+        struct lrouter_nat_lb_flow *new_flow = &ctx->new[type];
+        struct lrouter_nat_lb_flow *est_flow = &ctx->est[type];
+        struct ovs_mutex *hash_lock;
+        size_t index;
+
+        hash_lock = lflow_hash_lock(ctx->lflows, new_flow->hash);
+        BITMAP_FOR_EACH_1 (index, n_datapaths, ctx->dp_bitmap[type]) {
+            struct ovn_datapath *od = datapaths_array[index];
+            const char *meter = NULL;
+            struct ovn_lflow *lflow;
+
+            if (ctx->reject) {
+                meter = copp_meter_get(COPP_REJECT, od->nbr->copp,
+                                       ctx->meter_groups);
+            }
 
-    if (ctx->reject) {
-        meter = copp_meter_get(COPP_REJECT, od->nbr->copp, ctx->meter_groups);
-    }
+            if (meter ||
+                !ovn_dp_group_add_with_reference(new_lflow_ref, od)) {
+                lflow = ovn_lflow_add_at_with_hash(ctx->lflows, od,
+                        S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->new_match),
+                        new_flow->action, NULL, meter, &ctx->lb->nlb->header_,
+                        OVS_SOURCE_LOCATOR, new_flow->hash);
+                new_lflow_ref = meter ? NULL : lflow;
+            }
+        }
+        lflow_hash_unlock(hash_lock);
 
-    hash_lock = lflow_hash_lock(ctx->lflows, new_flow->hash);
-    if (meter || !ovn_dp_group_add_with_reference(new_flow->lflow_ref, od)) {
-        struct ovn_lflow *lflow = ovn_lflow_add_at_with_hash(ctx->lflows, od,
-                S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->new_match),
-                new_flow->action, NULL, meter, &ctx->lb->nlb->header_,
-                OVS_SOURCE_LOCATOR, new_flow->hash);
-        new_flow->lflow_ref = meter ? NULL : lflow;
-    }
-    lflow_hash_unlock(hash_lock);
+        hash_lock = lflow_hash_lock(ctx->lflows, est_flow->hash);
+        BITMAP_FOR_EACH_1 (index, n_datapaths, ctx->dp_bitmap[type]) {
+            struct ovn_datapath *od = datapaths_array[index];
 
-    hash_lock = lflow_hash_lock(ctx->lflows, est_flow->hash);
-    if (!ovn_dp_group_add_with_reference(est_flow->lflow_ref, od)) {
-        est_flow->lflow_ref = ovn_lflow_add_at_with_hash(ctx->lflows, od,
-                S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->est_match),
-                est_flow->action, NULL, NULL, &ctx->lb->nlb->header_,
-                OVS_SOURCE_LOCATOR, est_flow->hash);
+            if (!ovn_dp_group_add_with_reference(est_lflow_ref, od)) {
+                est_lflow_ref = ovn_lflow_add_at_with_hash(ctx->lflows, od,
+                        S_ROUTER_IN_DNAT, ctx->prio, ds_cstr(ctx->est_match),
+                        est_flow->action, NULL, NULL, &ctx->lb->nlb->header_,
+                        OVS_SOURCE_LOCATOR, est_flow->hash);
+            }
+        }
+        lflow_hash_unlock(hash_lock);
     }
-    lflow_hash_unlock(hash_lock);
 }
 
 static void
@@ -10748,6 +10762,10 @@  build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
         LROUTER_NAT_LB_FLOW_INIT(&est_match,
                                  "flags.force_snat_for_lb = 1; next;", prio);
 
+    for (size_t i = 0; i < LROUTER_NAT_LB_FLOW_MAX; i++) {
+        ctx.dp_bitmap[i] = bitmap_allocate(n_datapaths);
+    }
+
     struct ovn_datapath **lb_aff_force_snat_router =
         xcalloc(lb->n_nb_lr, sizeof *lb_aff_force_snat_router);
     int n_lb_aff_force_snat_router = 0;
@@ -10773,7 +10791,7 @@  build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
         }
 
         if (!od->n_l3dgw_ports) {
-            build_gw_lrouter_nat_flows_for_lb(&ctx, type, od);
+            bitmap_set1(ctx.dp_bitmap[type], od->index);
         } else {
             build_distr_lrouter_nat_flows_for_lb(&ctx, type, od);
         }
@@ -10803,6 +10821,8 @@  build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip,
         }
     }
 
+    build_gw_lrouter_nat_flows_for_lb(&ctx);
+
     /* LB affinity flows for datapaths where CMS has specified
      * force_snat_for_lb floag option.
      */