diff mbox series

[ovs-dev] ofproto-dpif-xlate: Reduce stack usage in do_xlate_actions

Message ID 20221021032706.2699168-1-mkp@redhat.com
State Changes Requested
Headers show
Series [ovs-dev] ofproto-dpif-xlate: Reduce stack usage in do_xlate_actions | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Mike Pattrick Oct. 21, 2022, 3:27 a.m. UTC
do_xlate_actions() is used during upcalls, including for recursive
actions including resubmit, clone, and goto_table. Currently it usses a
much larger volume of stack space than is apparent from the code alone
due to compiler optimization and stack management.

GCC's -fstack-usage reports it currently uses 720 bytes, this patch
reduces that to 288. In my testing of a resubmit loop, this patch
increases the number of recursions possible before a segfault by over
20%.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2104779
Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 include/openvswitch/compiler.h |  7 ++++++
 ofproto/ofproto-dpif-xlate.c   | 43 +++++++++++++++++++---------------
 2 files changed, 31 insertions(+), 19 deletions(-)

Comments

Ilya Maximets Nov. 2, 2022, 12:24 p.m. UTC | #1
On 10/21/22 05:27, Mike Pattrick wrote:
> do_xlate_actions() is used during upcalls, including for recursive
> actions including resubmit, clone, and goto_table. Currently it usses a
> much larger volume of stack space than is apparent from the code alone
> due to compiler optimization and stack management.
> 
> GCC's -fstack-usage reports it currently uses 720 bytes, this patch
> reduces that to 288.

Hmm, is that an accurate number?

> In my testing of a resubmit loop, this patch
> increases the number of recursions possible before a segfault by over
> 20%.

I would expect a higher recursion depth increase with the
720 -> 288 stack usage decrease.

> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2104779
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---
>  include/openvswitch/compiler.h |  7 ++++++
>  ofproto/ofproto-dpif-xlate.c   | 43 +++++++++++++++++++---------------
>  2 files changed, 31 insertions(+), 19 deletions(-)
> 
> diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h
> index cf009f826..89d36b38a 100644
> --- a/include/openvswitch/compiler.h
> +++ b/include/openvswitch/compiler.h
> @@ -284,5 +284,12 @@
>  #define BUILD_ASSERT_DECL_GCCONLY(EXPR) ((void) 0)
>  #endif
>  
> +/* Used to prevent inlining to aid in reducing stack usage of recursive
> + * functions. */
> +#if defined(__has_attribute) && __has_attribute(noinline)
> +#define OVS_NOINLINE __attribute__((noinline))
> +#else
> +#define OVS_NOINLINE
> +#endif

I'm not sure about manual un-inlining of the functions.
This doesn't scale well.

It's kind of a manual cherry-picking of which functions
to inline and which to not.  This may also cost us some
performance.  And also kind of compiler-dependent.

I'd suggest to move large memory allocations to heap
instead.  We have a plenty of candidates for this.
For example, the huge 'struct flow_tnl flow_tnl' in the
xlate_sample_action().

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h
index cf009f826..89d36b38a 100644
--- a/include/openvswitch/compiler.h
+++ b/include/openvswitch/compiler.h
@@ -284,5 +284,12 @@ 
 #define BUILD_ASSERT_DECL_GCCONLY(EXPR) ((void) 0)
 #endif
 
+/* Used to prevent inlining to aid in reducing stack usage of recursive
+ * functions. */
+#if defined(__has_attribute) && __has_attribute(noinline)
+#define OVS_NOINLINE __attribute__((noinline))
+#else
+#define OVS_NOINLINE
+#endif
 
 #endif /* compiler.h */
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 3b9b26da1..d7445855e 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -5377,7 +5377,7 @@  xlate_output_action(struct xlate_ctx *ctx, ofp_port_t port,
     }
 }
 
-static void
+static void OVS_NOINLINE
 xlate_output_reg_action(struct xlate_ctx *ctx,
                         const struct ofpact_output_reg *or,
                         bool is_last_action,
@@ -6313,7 +6313,7 @@  put_ct_nat(struct xlate_ctx *ctx)
     nl_msg_end_nested(ctx->odp_actions, nat_offset);
 }
 
-static void
+static void OVS_NOINLINE
 compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
                          bool is_last_action)
 {
@@ -6419,7 +6419,7 @@  compose_ct_clear_action(struct xlate_ctx *ctx)
  *
  * It is possible for freezing to happen for both the cases.
  */
-static void
+static void OVS_NOINLINE
 xlate_check_pkt_larger(struct xlate_ctx *ctx,
                        struct ofpact_check_pkt_larger *check_pkt_larger,
                        const struct ofpact *remaining_acts,
@@ -6691,7 +6691,7 @@  rewrite_flow_push_nsh(struct xlate_ctx *ctx,
     return buf;
 }
 
-static void
+static void OVS_NOINLINE
 xlate_generic_encap_action(struct xlate_ctx *ctx,
                            const struct ofpact_encap *encap)
 {
@@ -6839,7 +6839,7 @@  xlate_generic_decap_action(struct xlate_ctx *ctx,
     }
 }
 
-static void
+static void OVS_NOINLINE
 recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
 {
     /* No need to recirculate if already exiting. */
@@ -6965,6 +6965,24 @@  xlate_ofpact_unroll_xlate(struct xlate_ctx *ctx,
                  "cookie=%#"PRIx64, a->rule_table_id, a->rule_cookie);
 }
 
+static void
+xlate_trace(struct xlate_ctx *ctx, const struct ofpact *a) {
+    struct ofputil_port_map map = OFPUTIL_PORT_MAP_INITIALIZER(&map);
+
+    if (ctx->xin->names) {
+        struct ofproto_dpif *ofprotop;
+        ofprotop = ofproto_dpif_lookup_by_name(ctx->xbridge->name);
+        ofproto_append_ports_to_map(&map, ofprotop->up.ports);
+    }
+
+    struct ds s = DS_EMPTY_INITIALIZER;
+    struct ofpact_format_params fp = { .s = &s, .port_map = &map };
+    ofpacts_format(a, OFPACT_ALIGN(a->len), &fp);
+    xlate_report(ctx, OFT_ACTION, "%s", ds_cstr(&s));
+    ds_destroy(&s);
+    ofputil_port_map_destroy(&map);
+}
+
 static void
 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                  struct xlate_ctx *ctx, bool is_last_action,
@@ -7007,20 +7025,7 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
         }
 
         if (OVS_UNLIKELY(ctx->xin->trace)) {
-            struct ofputil_port_map map = OFPUTIL_PORT_MAP_INITIALIZER(&map);
-
-            if (ctx->xin->names) {
-                struct ofproto_dpif *ofprotop;
-                ofprotop = ofproto_dpif_lookup_by_name(ctx->xbridge->name);
-                ofproto_append_ports_to_map(&map, ofprotop->up.ports);
-            }
-
-            struct ds s = DS_EMPTY_INITIALIZER;
-            struct ofpact_format_params fp = { .s = &s, .port_map = &map };
-            ofpacts_format(a, OFPACT_ALIGN(a->len), &fp);
-            xlate_report(ctx, OFT_ACTION, "%s", ds_cstr(&s));
-            ds_destroy(&s);
-            ofputil_port_map_destroy(&map);
+            xlate_trace(ctx, a);
         }
 
         switch (a->type) {