diff mbox series

[ovs-dev] reconnect: Add Python implementation of received_attempt(), and test.

Message ID 20201222002150.859746-1-blp@ovn.org
State Accepted
Headers show
Series [ovs-dev] reconnect: Add Python implementation of received_attempt(), and test. | expand

Commit Message

Ben Pfaff Dec. 22, 2020, 12:21 a.m. UTC
This follows up on commit 4241d652e465 ("jsonrpc: Avoid disconnecting
prematurely due to long poll intervals."), which implemented the same
thing in C.

Requested-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
---
 python/ovs/jsonrpc.py   |  5 ++++-
 python/ovs/reconnect.py | 21 ++++++++++++++++++--
 tests/reconnect.at      | 43 +++++++++++++++++++++++++++++++++++------
 tests/test-reconnect.c  | 14 +++++++++++++-
 tests/test-reconnect.py | 13 ++++++++++++-
 5 files changed, 85 insertions(+), 11 deletions(-)

Comments

Ilya Maximets Jan. 5, 2021, 8:57 p.m. UTC | #1
On 12/22/20 1:21 AM, Ben Pfaff wrote:
> This follows up on commit 4241d652e465 ("jsonrpc: Avoid disconnecting
> prematurely due to long poll intervals."), which implemented the same
> thing in C.
> 
> Requested-by: Ilya Maximets <i.maximets@ovn.org>
> Signed-off-by: Ben Pfaff <blp@ovn.org>
> ---
>  python/ovs/jsonrpc.py   |  5 ++++-
>  python/ovs/reconnect.py | 21 ++++++++++++++++++--
>  tests/reconnect.at      | 43 +++++++++++++++++++++++++++++++++++------
>  tests/test-reconnect.c  | 14 +++++++++++++-
>  tests/test-reconnect.py | 13 ++++++++++++-
>  5 files changed, 85 insertions(+), 11 deletions(-)
> 

LGTM, Thanks!

Acked-by: Ilya Maximets <i.maximets@ovn.org>
Ben Pfaff Jan. 7, 2021, 7:06 p.m. UTC | #2
On Tue, Jan 05, 2021 at 09:57:46PM +0100, Ilya Maximets wrote:
> On 12/22/20 1:21 AM, Ben Pfaff wrote:
> > This follows up on commit 4241d652e465 ("jsonrpc: Avoid disconnecting
> > prematurely due to long poll intervals."), which implemented the same
> > thing in C.
> > 
> > Requested-by: Ilya Maximets <i.maximets@ovn.org>
> > Signed-off-by: Ben Pfaff <blp@ovn.org>
> > ---
> >  python/ovs/jsonrpc.py   |  5 ++++-
> >  python/ovs/reconnect.py | 21 ++++++++++++++++++--
> >  tests/reconnect.at      | 43 +++++++++++++++++++++++++++++++++++------
> >  tests/test-reconnect.c  | 14 +++++++++++++-
> >  tests/test-reconnect.py | 13 ++++++++++++-
> >  5 files changed, 85 insertions(+), 11 deletions(-)
> > 
> 
> LGTM, Thanks!
> 
> Acked-by: Ilya Maximets <i.maximets@ovn.org>

Thank you.  I applied this to master.

I didn't backport to 2.13 or 2.14 because I'm not sure about the
tradeoff between fixing theoretical problems in users of the Python
implementation (which I haven't actually seen) versus the possibility of
breaking something (either via Python changes or the test).
Ilya Maximets Jan. 7, 2021, 7:40 p.m. UTC | #3
On 1/7/21 8:06 PM, Ben Pfaff wrote:
> On Tue, Jan 05, 2021 at 09:57:46PM +0100, Ilya Maximets wrote:
>> On 12/22/20 1:21 AM, Ben Pfaff wrote:
>>> This follows up on commit 4241d652e465 ("jsonrpc: Avoid disconnecting
>>> prematurely due to long poll intervals."), which implemented the same
>>> thing in C.
>>>
>>> Requested-by: Ilya Maximets <i.maximets@ovn.org>
>>> Signed-off-by: Ben Pfaff <blp@ovn.org>
>>> ---
>>>  python/ovs/jsonrpc.py   |  5 ++++-
>>>  python/ovs/reconnect.py | 21 ++++++++++++++++++--
>>>  tests/reconnect.at      | 43 +++++++++++++++++++++++++++++++++++------
>>>  tests/test-reconnect.c  | 14 +++++++++++++-
>>>  tests/test-reconnect.py | 13 ++++++++++++-
>>>  5 files changed, 85 insertions(+), 11 deletions(-)
>>>
>>
>> LGTM, Thanks!
>>
>> Acked-by: Ilya Maximets <i.maximets@ovn.org>
> 
> Thank you.  I applied this to master.
> 
> I didn't backport to 2.13 or 2.14 because I'm not sure about the
> tradeoff between fixing theoretical problems in users of the Python
> implementation (which I haven't actually seen) versus the possibility of
> breaking something (either via Python changes or the test).
> 

I agree that having this on master is enough.
Thanks.
diff mbox series

Patch

diff --git a/python/ovs/jsonrpc.py b/python/ovs/jsonrpc.py
index 240e8ccd4ff9..bf32f8c87cb3 100644
--- a/python/ovs/jsonrpc.py
+++ b/python/ovs/jsonrpc.py
@@ -570,13 +570,16 @@  class Session(object):
         if self.rpc is not None:
             received_bytes = self.rpc.get_received_bytes()
             error, msg = self.rpc.recv()
+
+            now = ovs.timeval.msec()
+            self.reconnect.receive_attempted(now)
             if received_bytes != self.rpc.get_received_bytes():
                 # Data was successfully received.
                 #
                 # Previously we only counted receiving a full message as
                 # activity, but with large messages or a slow connection that
                 # policy could time out the session mid-message.
-                self.reconnect.activity(ovs.timeval.msec())
+                self.reconnect.activity(now)
 
             if not error:
                 if msg.type == Message.T_REQUEST and msg.method == "echo":
diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py
index 574db7fdd819..c4c6c87e9f39 100644
--- a/python/ovs/reconnect.py
+++ b/python/ovs/reconnect.py
@@ -95,7 +95,10 @@  class Reconnect(object):
         def deadline(fsm):
             if fsm.probe_interval:
                 base = max(fsm.last_activity, fsm.state_entered)
-                return base + fsm.probe_interval
+                expiration = base + fsm.probe_interval
+                if (fsm.last_receive_attempt is None or
+                    fsm.last_receive_attempt >= expiration):
+                    return expiration
             return None
 
         @staticmethod
@@ -113,7 +116,10 @@  class Reconnect(object):
         @staticmethod
         def deadline(fsm):
             if fsm.probe_interval:
-                return fsm.state_entered + fsm.probe_interval
+                expiration = fsm.state_entered + fsm.probe_interval
+                if (fsm.last_receive_attempt is None or
+                    fsm.last_receive_attempt >= expiration):
+                    return expiration
             return None
 
         @staticmethod
@@ -153,6 +159,7 @@  class Reconnect(object):
         self.last_activity = now
         self.last_connected = None
         self.last_disconnected = None
+        self.last_receive_attempt = now
         self.max_tries = None
         self.backoff_free_tries = 0
 
@@ -472,6 +479,16 @@  class Reconnect(object):
             self._transition(now, Reconnect.Active)
         self.last_activity = now
 
+    def receive_attempted(self, now):
+        """Tell 'fsm' that some attempt to receive data on the connection was
+        made at 'now'.  The FSM only allows probe interval timer to expire when
+        some attempt to receive data on the connection was received after the
+        time when it should have expired.  This helps in the case where there's
+        a long delay in the poll loop and then reconnect_run() executes before
+        the code to try to receive anything from the remote runs.  (To disable
+        this feature, pass None for 'now'.)"""
+        self.last_receive_attempt = now
+
     def _transition(self, now, state):
         if self.state == Reconnect.ConnectInProgress:
             self.n_attempted_connections += 1
diff --git a/tests/reconnect.at b/tests/reconnect.at
index 2631658be08a..0f74709f5a7c 100644
--- a/tests/reconnect.at
+++ b/tests/reconnect.at
@@ -38,7 +38,12 @@  RECONNECT_CHECK([quick connect, idle disconnect],
 run
 connected
 
-# Send inactivity probe.
+# Try timeout without noting that we tried to receive.
+# (This does nothing since we never timeout in this case.)
+timeout
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
 timeout
 run
 
@@ -61,7 +66,13 @@  connected
   connected
   last connected 0 ms ago, connected 0 ms total
 
-# Send inactivity probe.
+# Try timeout without noting that we tried to receive.
+# (This does nothing since we never timeout in this case.)
+timeout
+  no timeout
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
 timeout
   advance 5000 ms
 
@@ -99,7 +110,12 @@  advance 500
 run
 connected
 
-# Send inactivity probe.
+# Try timeout without noting that we tried to receive.
+# (This does nothing since we never timeout in this case.)
+timeout
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
 timeout
 run
 
@@ -131,7 +147,13 @@  connected
   connected
   last connected 0 ms ago, connected 0 ms total
 
-# Send inactivity probe.
+# Try timeout without noting that we tried to receive.
+# (This does nothing since we never timeout in this case.)
+timeout
+  no timeout
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
 timeout
   advance 5000 ms
 
@@ -357,7 +379,8 @@  connect-failed
 
 ######################################################################
 RECONNECT_CHECK([connections with no data preserve backoff],
-  [enable
+  [receive-attempted LLONG_MAX
+enable
 
 # First connect, then idle timeout kills connection.
 run
@@ -397,6 +420,7 @@  disconnected
 # Back off for 4000 ms.
 timeout
 ], [### t=1000 ###
+receive-attempted LLONG_MAX
 enable
   in BACKOFF for 0 ms (0 ms backoff)
 
@@ -1083,7 +1107,8 @@  timeout
 
 ######################################################################
 RECONNECT_CHECK([max-tries of 1 honored],
-  [set-max-tries 1
+  [receive-attempted LLONG_MAX
+set-max-tries 1
 enable
 
 # Connection succeeds.
@@ -1100,6 +1125,7 @@  run
 disconnected
 ],
   [### t=1000 ###
+receive-attempted LLONG_MAX
 set-max-tries 1
   1 tries left
 enable
@@ -1185,6 +1211,8 @@  activity
 
 # Connection times out.
 timeout
+receive-attempted LLONG_MAX
+timeout
 run
 timeout
 run
@@ -1243,6 +1271,9 @@  activity
   created 1000, last activity 3000, last connected 2000
 
 # Connection times out.
+timeout
+  no timeout
+receive-attempted LLONG_MAX
 timeout
   advance 5000 ms
 
diff --git a/tests/test-reconnect.c b/tests/test-reconnect.c
index bf0463e25c0b..c84bb1cdbfb8 100644
--- a/tests/test-reconnect.c
+++ b/tests/test-reconnect.c
@@ -48,7 +48,6 @@  test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 
     now = 1000;
     reconnect = reconnect_create(now);
-    reconnect_receive_attempted(reconnect, LLONG_MAX);
     reconnect_set_name(reconnect, "remote");
     reconnect_get_stats(reconnect, now, &prev);
     printf("### t=%d ###\n", now);
@@ -278,6 +277,18 @@  do_listen_error(struct ovs_cmdl_context *ctx)
     reconnect_listen_error(reconnect, now, atoi(ctx->argv[1]));
 }
 
+static void
+do_receive_attempted(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+    if (!strcmp(ctx->argv[1], "now")) {
+        reconnect_receive_attempted(reconnect, now);
+    } else if (!strcmp(ctx->argv[1], "LLONG_MAX")) {
+        reconnect_receive_attempted(reconnect, LLONG_MAX);
+    } else {
+        ovs_fatal(0, "%s: bad argument %s", ctx->argv[0], ctx->argv[1]);
+    }
+}
+
 static const struct ovs_cmdl_command all_commands[] = {
     { "enable", NULL, 0, 0, do_enable, OVS_RO },
     { "disable", NULL, 0, 0, do_disable, OVS_RO },
@@ -296,6 +307,7 @@  static const struct ovs_cmdl_command all_commands[] = {
     { "passive", NULL, 0, 0, do_set_passive, OVS_RO },
     { "listening", NULL, 0, 0, do_listening, OVS_RO },
     { "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },
+    { "receive-attempted", NULL, 1, 1, do_receive_attempted, OVS_RO },
     { NULL, NULL, 0, 0, NULL, OVS_RO },
 };
 
diff --git a/tests/test-reconnect.py b/tests/test-reconnect.py
index 6cd052878eb1..f0ad9f9793a9 100644
--- a/tests/test-reconnect.py
+++ b/tests/test-reconnect.py
@@ -163,6 +163,16 @@  def do_listen_error(arg):
     r.listen_error(now, int(arg))
 
 
+def do_receive_attempted(arg):
+    if arg == "now":
+        r.receive_attempted(now)
+    elif arg == "LLONG_MAX":
+        r.receive_attempted(None)
+    else:
+        sys.stderr.write("receive-attempted: bad argument %s\n" % arg)
+        sys.exit(1)
+
+
 def main():
     commands = {
         "enable": do_enable,
@@ -180,7 +190,8 @@  def main():
         "set-backoff-free-tries": do_set_backoff_free_tries,
         "passive": do_set_passive,
         "listening": do_listening,
-        "listen-error": do_listen_error
+        "listen-error": do_listen_error,
+        "receive-attempted": do_receive_attempted
     }
 
     global now