[ovs-dev,PATCHv3,1/2] xlate: auto ofproto trace when recursion too deep

Message ID 1519864348-57607-1-git-send-email-u9012063@gmail.com
State New
Headers show
Series
  • [ovs-dev,PATCHv3,1/2] xlate: auto ofproto trace when recursion too deep
Related show

Commit Message

William Tu March 1, 2018, 12:32 a.m.
Usually ofproto/trace is used to debug the flow translation error.
When translation error such as recursion too deep or too many resubmit,
the issue might happen momentary; flows causing the recursion expire
when users try to debug it.  This patch enables the ofproto trace
automatically when recursion is too deep or too many resubmit, by
invoking the translation again, and log the ofproto trace as warnings.
Since the log will be huge, rate limit to one per minute.

VMWare-BZ: #2054659
Signed-off-by: William Tu <u9012063@gmail.com>
---
 ofproto/ofproto-dpif-trace.c  |  7 +------
 ofproto/ofproto-dpif-trace.h  |  6 ++++++
 ofproto/ofproto-dpif-upcall.c | 22 +++++++++++++++++++++-
 tests/ofproto-dpif.at         | 25 +++++++++++++++++++++++++
 4 files changed, 53 insertions(+), 7 deletions(-)

Comments

Ben Pfaff March 6, 2018, 12:20 a.m. | #1
On Wed, Feb 28, 2018 at 04:32:27PM -0800, William Tu wrote:
> Usually ofproto/trace is used to debug the flow translation error.
> When translation error such as recursion too deep or too many resubmit,
> the issue might happen momentary; flows causing the recursion expire
> when users try to debug it.  This patch enables the ofproto trace
> automatically when recursion is too deep or too many resubmit, by
> invoking the translation again, and log the ofproto trace as warnings.
> Since the log will be huge, rate limit to one per minute.
> 
> VMWare-BZ: #2054659
> Signed-off-by: William Tu <u9012063@gmail.com>

Thank you!

I had to fold in the following because of recently added GCC warnings
that caused a diagnostic for 'rl' having the same name as a different
variable in an outer scope.  I also changed the burst size from 5 to 1
because I am super nervous about this change using a lot of space in
lots (after all, we *know* that this is going to produce at least 64
levels of output).

I applied this to master.  Thanks again!

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 9bba16873d2d..9d8818095add 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1168,10 +1168,10 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall,
      * these two error types. */
     if (xerr == XLATE_RECURSION_TOO_DEEP ||
         xerr == XLATE_TOO_MANY_RESUBMITS) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 1);
 
         /* This is a huge log, so be conservative. */
-        if (!VLOG_DROP_WARN(&rl)) {
+        if (!VLOG_DROP_WARN(&rll)) {
             ds_init(&output);
             ofproto_trace(upcall->ofproto, upcall->flow,
                           upcall->packet, NULL, 0, NULL, &output);

Patch

diff --git a/ofproto/ofproto-dpif-trace.c b/ofproto/ofproto-dpif-trace.c
index 4071cc0ffd65..7211eac8e1a5 100644
--- a/ofproto/ofproto-dpif-trace.c
+++ b/ofproto/ofproto-dpif-trace.c
@@ -23,11 +23,6 @@ 
 #include "ofproto-dpif-xlate.h"
 #include "unixctl.h"
 
-static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
-                          const struct dp_packet *packet,
-                          const struct ofpact[], size_t ofpacts_len,
-                          struct ovs_list *next_ct_states,
-                          struct ds *);
 static void oftrace_node_destroy(struct oftrace_node *);
 
 /* Creates a new oftrace_node, populates it with the given 'type' and a copy of
@@ -661,7 +656,7 @@  ofproto_trace__(struct ofproto_dpif *ofproto, const struct flow *flow,
  *
  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
  * trace, otherwise the actions are determined by a flow table lookup. */
-static void
+void
 ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
               const struct dp_packet *packet,
               const struct ofpact ofpacts[], size_t ofpacts_len,
diff --git a/ofproto/ofproto-dpif-trace.h b/ofproto/ofproto-dpif-trace.h
index 5e51771b19b0..ea6cb3fc01ac 100644
--- a/ofproto/ofproto-dpif-trace.h
+++ b/ofproto/ofproto-dpif-trace.h
@@ -28,6 +28,8 @@ 
  * each action (OFT_ACTION) executed in the table.
  */
 
+#include "openvswitch/dynamic-string.h"
+#include "ofproto/ofproto-dpif.h"
 #include "openvswitch/compiler.h"
 #include "openvswitch/list.h"
 #include "flow.h"
@@ -79,6 +81,10 @@  struct oftrace_next_ct_state {
 };
 
 void ofproto_dpif_trace_init(void);
+void ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
+              const struct dp_packet *packet,
+              const struct ofpact *, size_t ofpacts_len,
+              struct ovs_list *next_ct_states, struct ds *output);
 
 struct oftrace_node *oftrace_report(struct ovs_list *, enum oftrace_node_type,
                                     const char *text);
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 526be7760e0a..9bba16873d2d 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -34,6 +34,7 @@ 
 #include "ofproto-dpif-sflow.h"
 #include "ofproto-dpif-xlate.h"
 #include "ofproto-dpif-xlate-cache.h"
+#include "ofproto-dpif-trace.h"
 #include "ovs-rcu.h"
 #include "packets.h"
 #include "openvswitch/poll-loop.h"
@@ -1124,7 +1125,9 @@  upcall_xlate(struct udpif *udpif, struct upcall *upcall,
              struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
     struct dpif_flow_stats stats;
+    enum xlate_error xerr;
     struct xlate_in xin;
+    struct ds output;
 
     stats.n_packets = 1;
     stats.n_bytes = dp_packet_size(upcall->packet);
@@ -1159,7 +1162,24 @@  upcall_xlate(struct udpif *udpif, struct upcall *upcall,
     upcall->dump_seq = seq_read(udpif->dump_seq);
     upcall->reval_seq = seq_read(udpif->reval_seq);
 
-    xlate_actions(&xin, &upcall->xout);
+    xerr = xlate_actions(&xin, &upcall->xout);
+
+    /* Translate again and log the ofproto trace for
+     * these two error types. */
+    if (xerr == XLATE_RECURSION_TOO_DEEP ||
+        xerr == XLATE_TOO_MANY_RESUBMITS) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+        /* This is a huge log, so be conservative. */
+        if (!VLOG_DROP_WARN(&rl)) {
+            ds_init(&output);
+            ofproto_trace(upcall->ofproto, upcall->flow,
+                          upcall->packet, NULL, 0, NULL, &output);
+            VLOG_WARN("%s", ds_cstr(&output));
+            ds_destroy(&output);
+        }
+    }
+
     if (wc) {
         /* Convert the input port wildcard from OFP to ODP format. There's no
          * real way to do this for arbitrary bitmasks since the numbering spaces
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 600afdda8528..fd78c5d9b593 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -8473,6 +8473,31 @@  AT_CHECK([grep -c 'over max translation depth 64' stdout],
 OVS_VSWITCHD_STOP(["/resubmit actions recursed/d"])
 AT_CLEANUP
 
+dnl Without using ofproto/trace, make sure the
+dnl ofproto trace is still logged
+AT_SETUP([ofproto-dpif - backward resubmit without trace])
+OVS_VSWITCHD_START
+(echo "table=0, actions=resubmit(,66)"
+ for i in `seq 2 66`; do
+    j=`expr $i - 1`
+    echo "table=$i, actions=resubmit(,$j)"
+ done
+ echo "table=1, actions=local") > flows
+AT_CHECK([ovs-ofctl add-flows br0 flows])
+
+AT_CHECK([ovs-appctl netdev-dummy/receive br0 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x1234)'], [0], [stdout])
+
+OVS_WAIT_UNTIL([grep 'over max translation' ovs-vswitchd.log])
+
+dnl make sure the full ofproto trace dump is present
+AT_CHECK([grep -c "^ *resubmit" ovs-vswitchd.log],
+  [0], [66
+])
+
+OVS_VSWITCHD_STOP(["/over max translation depth/d
+/ofproto_dpif_upcall/d"])
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - exponential resubmit chain])
 OVS_VSWITCHD_START
 add_of_ports br0 1