{"id":815894,"url":"http://patchwork.ozlabs.org/api/patches/815894/?format=json","web_url":"http://patchwork.ozlabs.org/project/openvswitch/patch/20170919220125.32535-42-blp@ovn.org/","project":{"id":47,"url":"http://patchwork.ozlabs.org/api/projects/47/?format=json","name":"Open vSwitch","link_name":"openvswitch","list_id":"ovs-dev.openvswitch.org","list_email":"ovs-dev@openvswitch.org","web_url":"http://openvswitch.org/","scm_url":"git@github.com:openvswitch/ovs.git","webscm_url":"https://github.com/openvswitch/ovs","list_archive_url":"","list_archive_url_format":"","commit_url_format":""},"msgid":"<20170919220125.32535-42-blp@ovn.org>","list_archive_url":null,"date":"2017-09-19T22:01:14","name":"[ovs-dev,RFC,41/52] reconnect: Add ability to do a number of retries without backoff.","commit_ref":null,"pull_url":null,"state":"rfc","archived":false,"hash":"e00caeee201252cbe0aa0bb6e32cee819a6796c6","submitter":{"id":67603,"url":"http://patchwork.ozlabs.org/api/people/67603/?format=json","name":"Ben Pfaff","email":"blp@ovn.org"},"delegate":null,"mbox":"http://patchwork.ozlabs.org/project/openvswitch/patch/20170919220125.32535-42-blp@ovn.org/mbox/","series":[{"id":3975,"url":"http://patchwork.ozlabs.org/api/series/3975/?format=json","web_url":"http://patchwork.ozlabs.org/project/openvswitch/list/?series=3975","date":"2017-09-19T22:00:34","name":"clustering implementation","version":1,"mbox":"http://patchwork.ozlabs.org/series/3975/mbox/"}],"comments":"http://patchwork.ozlabs.org/api/patches/815894/comments/","check":"pending","checks":"http://patchwork.ozlabs.org/api/patches/815894/checks/","tags":{},"related":[],"headers":{"Return-Path":"<ovs-dev-bounces@openvswitch.org>","X-Original-To":["incoming@patchwork.ozlabs.org","dev@openvswitch.org"],"Delivered-To":["patchwork-incoming@bilbo.ozlabs.org","ovs-dev@mail.linuxfoundation.org"],"Authentication-Results":"ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=openvswitch.org\n\t(client-ip=140.211.169.12; helo=mail.linuxfoundation.org;\n\tenvelope-from=ovs-dev-bounces@openvswitch.org;\n\treceiver=<UNKNOWN>)","Received":["from mail.linuxfoundation.org (mail.linuxfoundation.org\n\t[140.211.169.12])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xxcks3pdZz9sBW\n\tfor <incoming@patchwork.ozlabs.org>;\n\tWed, 20 Sep 2017 08:20:21 +1000 (AEST)","from mail.linux-foundation.org (localhost [127.0.0.1])\n\tby mail.linuxfoundation.org (Postfix) with ESMTP id E70ABD07;\n\tTue, 19 Sep 2017 22:02:44 +0000 (UTC)","from smtp1.linuxfoundation.org (smtp1.linux-foundation.org\n\t[172.17.192.35])\n\tby mail.linuxfoundation.org (Postfix) with ESMTPS id 00309D05\n\tfor <dev@openvswitch.org>; Tue, 19 Sep 2017 22:02:42 +0000 (UTC)","from relay4-d.mail.gandi.net (relay4-d.mail.gandi.net\n\t[217.70.183.196])\n\tby smtp1.linuxfoundation.org (Postfix) with ESMTPS id D796220D\n\tfor <dev@openvswitch.org>; Tue, 19 Sep 2017 22:02:40 +0000 (UTC)","from sigabrt.benpfaff.org (unknown [208.91.2.3])\n\t(Authenticated sender: blp@ovn.org)\n\tby relay4-d.mail.gandi.net (Postfix) with ESMTPSA id AC7A1172094;\n\tWed, 20 Sep 2017 00:02:38 +0200 (CEST)"],"X-Greylist":"domain auto-whitelisted by SQLgrey-1.7.6","X-Originating-IP":"208.91.2.3","From":"Ben Pfaff <blp@ovn.org>","To":"dev@openvswitch.org","Date":"Tue, 19 Sep 2017 15:01:14 -0700","Message-Id":"<20170919220125.32535-42-blp@ovn.org>","X-Mailer":"git-send-email 2.10.2","In-Reply-To":"<20170919220125.32535-1-blp@ovn.org>","References":"<20170919220125.32535-1-blp@ovn.org>","X-Spam-Status":"No, score=-0.7 required=5.0 tests=RCVD_IN_DNSWL_LOW\n\tautolearn=disabled version=3.3.1","X-Spam-Checker-Version":"SpamAssassin 3.3.1 (2010-03-16) on\n\tsmtp1.linux-foundation.org","Cc":"Ben Pfaff <blp@ovn.org>","Subject":"[ovs-dev] [PATCH RFC 41/52] reconnect: Add ability to do a number\n\tof retries without backoff.","X-BeenThere":"ovs-dev@openvswitch.org","X-Mailman-Version":"2.1.12","Precedence":"list","List-Id":"<ovs-dev.openvswitch.org>","List-Unsubscribe":"<https://mail.openvswitch.org/mailman/options/ovs-dev>,\n\t<mailto:ovs-dev-request@openvswitch.org?subject=unsubscribe>","List-Archive":"<http://mail.openvswitch.org/pipermail/ovs-dev/>","List-Post":"<mailto:ovs-dev@openvswitch.org>","List-Help":"<mailto:ovs-dev-request@openvswitch.org?subject=help>","List-Subscribe":"<https://mail.openvswitch.org/mailman/listinfo/ovs-dev>,\n\t<mailto:ovs-dev-request@openvswitch.org?subject=subscribe>","MIME-Version":"1.0","Content-Type":"text/plain; charset=\"us-ascii\"","Content-Transfer-Encoding":"7bit","Sender":"ovs-dev-bounces@openvswitch.org","Errors-To":"ovs-dev-bounces@openvswitch.org"},"content":"This is aimed at an upcoming database clustering implementation, where it's\ndesirable to try all of the cluster members quickly before backing off to\nretry them again in sequence.\n\nSigned-off-by: Ben Pfaff <blp@ovn.org>\n---\n lib/reconnect.c         | 52 ++++++++++++++++++++++++++++++---------------\n lib/reconnect.h         |  3 +++\n python/ovs/reconnect.py | 53 ++++++++++++++++++++++++++++++----------------\n tests/reconnect.at      | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-\n tests/test-reconnect.c  |  8 +++++++\n tests/test-reconnect.py |  5 +++++\n 6 files changed, 141 insertions(+), 36 deletions(-)","diff":"diff --git a/lib/reconnect.c b/lib/reconnect.c\nindex 471fb7fc8d61..f91b4c09ae5d 100644\n--- a/lib/reconnect.c\n+++ b/lib/reconnect.c\n@@ -62,6 +62,7 @@ struct reconnect {\n     long long int last_connected;\n     long long int last_disconnected;\n     unsigned int max_tries;\n+    unsigned int backoff_free_tries;\n \n     /* These values are simply for statistics reporting, not otherwise used\n      * directly by anything internal. */\n@@ -206,6 +207,15 @@ reconnect_get_max_tries(struct reconnect *fsm)\n     return fsm->max_tries;\n }\n \n+/* Sets the number of connection attempts that will be made without backoff to\n+ * 'backoff_free_tries'.  Values 0 and 1 both represent a single attempt. */\n+void\n+reconnect_set_backoff_free_tries(struct reconnect *fsm,\n+                                 unsigned int backoff_free_tries)\n+{\n+    fsm->backoff_free_tries = backoff_free_tries;\n+}\n+\n /* Configures the backoff parameters for 'fsm'.  'min_backoff' is the minimum\n  * number of milliseconds, and 'max_backoff' is the maximum, between connection\n  * attempts.  The current backoff is also the duration that 'fsm' is willing to\n@@ -346,7 +356,7 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error)\n                 VLOG(fsm->info, \"%s: error listening for connections\",\n                      fsm->name);\n             }\n-        } else {\n+        } else if (fsm->backoff < fsm->max_backoff) {\n             const char *type = fsm->passive ? \"listen\" : \"connection\";\n             if (error > 0) {\n                 VLOG_INFO(\"%s: %s attempt failed (%s)\",\n@@ -359,30 +369,38 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error)\n         if (fsm->state & (S_ACTIVE | S_IDLE)) {\n             fsm->last_disconnected = now;\n         }\n+\n+        if (!reconnect_may_retry(fsm)) {\n+            reconnect_transition__(fsm, now, S_VOID);\n+            return;\n+        }\n+\n         /* Back off. */\n-        if (fsm->state & (S_ACTIVE | S_IDLE)\n-             && (fsm->last_activity - fsm->last_connected >= fsm->backoff\n-                 || fsm->passive)) {\n+        if (fsm->backoff_free_tries > 1) {\n+            fsm->backoff_free_tries--;\n+            fsm->backoff = 0;\n+        } else if (fsm->state & (S_ACTIVE | S_IDLE)\n+                   && (fsm->last_activity - fsm->last_connected >= fsm->backoff\n+                       || fsm->passive)) {\n             fsm->backoff = fsm->passive ? 0 : fsm->min_backoff;\n         } else {\n             if (fsm->backoff < fsm->min_backoff) {\n                 fsm->backoff = fsm->min_backoff;\n-            } else if (fsm->backoff >= fsm->max_backoff / 2) {\n-                fsm->backoff = fsm->max_backoff;\n-            } else {\n+            } else if (fsm->backoff < fsm->max_backoff / 2) {\n                 fsm->backoff *= 2;\n-            }\n-            if (fsm->passive) {\n-                VLOG(fsm->info, \"%s: waiting %.3g seconds before trying to \"\n-                          \"listen again\", fsm->name, fsm->backoff / 1000.0);\n+                VLOG(fsm->info, \"%s: waiting %.3g seconds before %s\",\n+                     fsm->name, fsm->backoff / 1000.0,\n+                     fsm->passive ? \"trying to listen again\" : \"reconnect\");\n             } else {\n-                VLOG(fsm->info, \"%s: waiting %.3g seconds before reconnect\",\n-                          fsm->name, fsm->backoff / 1000.0);\n+                if (fsm->backoff < fsm->max_backoff) {\n+                    VLOG_INFO(\"%s: continuing to %s in the background but \"\n+                              \"suppressing further logging\", fsm->name,\n+                              fsm->passive ? \"try to listen\" : \"reconnect\");\n+                }\n+                fsm->backoff = fsm->max_backoff;\n             }\n         }\n-\n-        reconnect_transition__(fsm, now,\n-                               reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID);\n+        reconnect_transition__(fsm, now, S_BACKOFF);\n     }\n }\n \n@@ -397,7 +415,7 @@ reconnect_connecting(struct reconnect *fsm, long long int now)\n     if (fsm->state != S_CONNECTING) {\n         if (fsm->passive) {\n             VLOG(fsm->info, \"%s: listening...\", fsm->name);\n-        } else {\n+        } else if (fsm->backoff < fsm->max_backoff) {\n             VLOG(fsm->info, \"%s: connecting...\", fsm->name);\n         }\n         reconnect_transition__(fsm, now, S_CONNECTING);\ndiff --git a/lib/reconnect.h b/lib/reconnect.h\nindex 4446713ce873..9f2d469e2ddd 100644\n--- a/lib/reconnect.h\n+++ b/lib/reconnect.h\n@@ -51,6 +51,8 @@ int reconnect_get_probe_interval(const struct reconnect *);\n \n void reconnect_set_max_tries(struct reconnect *, unsigned int max_tries);\n unsigned int reconnect_get_max_tries(struct reconnect *);\n+void reconnect_set_backoff_free_tries(struct reconnect *,\n+                                      unsigned int backoff_free_tries);\n \n void reconnect_set_backoff(struct reconnect *,\n                            int min_backoff, int max_backoff);\n@@ -65,6 +67,7 @@ void reconnect_enable(struct reconnect *, long long int now);\n void reconnect_disable(struct reconnect *, long long int now);\n \n void reconnect_force_reconnect(struct reconnect *, long long int now);\n+void reconnect_skip_backoff(struct reconnect *);\n \n bool reconnect_is_connected(const struct reconnect *);\n unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *,\ndiff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py\nindex ec52ebb7affc..34cc76987031 100644\n--- a/python/ovs/reconnect.py\n+++ b/python/ovs/reconnect.py\n@@ -154,6 +154,7 @@ class Reconnect(object):\n         self.last_connected = None\n         self.last_disconnected = None\n         self.max_tries = None\n+        self.backoff_free_tries = 0\n \n         self.creation_time = now\n         self.n_attempted_connections = 0\n@@ -242,6 +243,12 @@ class Reconnect(object):\n             self.backoff > self.max_backoff):\n                 self.backoff = self.max_backoff\n \n+    def set_backoff_free_tries(self, backoff_free_tries):\n+        \"\"\"Sets the number of connection attempts that will be made without\n+        backoff to 'backoff_free_tries'.  Values 0 and 1 both\n+        represent a single attempt.\"\"\"\n+        self.backoff_free_tries = backoff_free_tries\n+\n     def set_probe_interval(self, probe_interval):\n         \"\"\"Sets the \"probe interval\" to 'probe_interval', in milliseconds.  If\n         this is zero, it disables the connection keepalive feature.  If it is\n@@ -337,7 +344,7 @@ class Reconnect(object):\n                 else:\n                     self.info_level(\"%s: error listening for connections\"\n                                     % self.name)\n-            else:\n+            elif self.backoff < self.max_backoff:\n                 if self.passive:\n                     type_ = \"listen\"\n                 else:\n@@ -352,8 +359,15 @@ class Reconnect(object):\n             if (self.state in (Reconnect.Active, Reconnect.Idle)):\n                 self.last_disconnected = now\n \n+            if not self.__may_retry():\n+                self._transition(now, Reconnect.Void)\n+                return\n+\n             # Back off\n-            if (self.state in (Reconnect.Active, Reconnect.Idle) and\n+            if self.backoff_free_tries > 1:\n+                self.backoff_free_tries -= 1\n+                self.backoff = 0\n+            elif (self.state in (Reconnect.Active, Reconnect.Idle) and\n                 (self.last_activity - self.last_connected >= self.backoff or\n                  self.passive)):\n                 if self.passive:\n@@ -363,23 +377,26 @@ class Reconnect(object):\n             else:\n                 if self.backoff < self.min_backoff:\n                     self.backoff = self.min_backoff\n-                elif self.backoff >= self.max_backoff / 2:\n-                    self.backoff = self.max_backoff\n-                else:\n+                elif self.backoff < self.max_backoff / 2:\n                     self.backoff *= 2\n-\n-                if self.passive:\n-                    self.info_level(\"%s: waiting %.3g seconds before trying \"\n-                                    \"to listen again\"\n-                                    % (self.name, self.backoff / 1000.0))\n+                    if self.passive:\n+                        action = \"trying to listen again\"\n+                    else:\n+                        action = \"reconnect\"\n+                    self.info_level(\"%s: waiting %.3g seconds before %s\"\n+                                    % (self.name, self.backoff / 1000.0,\n+                                       action))\n                 else:\n-                    self.info_level(\"%s: waiting %.3g seconds before reconnect\"\n-                                    % (self.name, self.backoff / 1000.0))\n-\n-            if self.__may_retry():\n-                self._transition(now, Reconnect.Backoff)\n-            else:\n-                self._transition(now, Reconnect.Void)\n+                    if self.backoff < self.max_backoff:\n+                        if self.passive:\n+                            action = \"try to listen\"\n+                        else:\n+                            action = \"reconnect\"\n+                        self.info_level(\"%s: continuing to %s in the \"\n+                                        \"background but suppressing further \"\n+                                        \"logging\" % (self.name, action))\n+                    self.backoff = self.max_backoff\n+            self._transition(now, Reconnect.Backoff)\n \n     def connecting(self, now):\n         \"\"\"Tell this FSM that a connection or listening attempt is in progress.\n@@ -390,7 +407,7 @@ class Reconnect(object):\n         if self.state != Reconnect.ConnectInProgress:\n             if self.passive:\n                 self.info_level(\"%s: listening...\" % self.name)\n-            else:\n+            elif self.backoff < self.max_backoff:\n                 self.info_level(\"%s: connecting...\" % self.name)\n             self._transition(now, Reconnect.ConnectInProgress)\n \ndiff --git a/tests/reconnect.at b/tests/reconnect.at\nindex c88ca785cad2..59c95d95bdd3 100644\n--- a/tests/reconnect.at\n+++ b/tests/reconnect.at\n@@ -1037,6 +1037,60 @@ timeout\n ])\n \n ######################################################################\n+RECONNECT_CHECK([backoff-free tries work],\n+  [set-backoff-free-tries 2\n+enable\n+\n+# Connection fails quickly.\n+run\n+connect-failed ECONNREFUSED\n+\n+# No backoff.\n+run\n+timeout\n+\n+# Connection fails quickly again.\n+run\n+connect-failed ECONNREFUSED\n+\n+# Back off for 1000 ms.\n+run\n+timeout\n+],\n+   [### t=1000 ###\n+set-backoff-free-tries 2\n+enable\n+  in BACKOFF for 0 ms (0 ms backoff)\n+\n+# Connection fails quickly.\n+run\n+  should connect\n+connect-failed ECONNREFUSED\n+  0 successful connections out of 1 attempts, seqno 0\n+\n+# No backoff.\n+run\n+  should connect\n+timeout\n+  advance 0 ms\n+\n+# Connection fails quickly again.\n+run\n+  should connect\n+connect-failed ECONNREFUSED\n+  in BACKOFF for 0 ms (1000 ms backoff)\n+  0 successful connections out of 2 attempts, seqno 0\n+\n+# Back off for 1000 ms.\n+run\n+timeout\n+  advance 1000 ms\n+\n+### t=2000 ###\n+  in BACKOFF for 1000 ms (1000 ms backoff)\n+])\n+\n+######################################################################\n RECONNECT_CHECK([max-tries of 1 honored],\n   [set-max-tries 1\n enable\n@@ -1090,7 +1144,7 @@ timeout\n run\n   should disconnect\n disconnected\n-  in VOID for 0 ms (1000 ms backoff)\n+  in VOID for 0 ms (0 ms backoff)\n   1 successful connections out of 1 attempts, seqno 2\n   disconnected\n   disconnected at 11000 ms (0 ms ago)\ndiff --git a/tests/test-reconnect.c b/tests/test-reconnect.c\nindex 72252b8f707b..5a14e7fe58da 100644\n--- a/tests/test-reconnect.c\n+++ b/tests/test-reconnect.c\n@@ -208,6 +208,12 @@ do_set_max_tries(struct ovs_cmdl_context *ctx)\n }\n \n static void\n+do_set_backoff_free_tries(struct ovs_cmdl_context *ctx)\n+{\n+    reconnect_set_backoff_free_tries(reconnect, atoi(ctx->argv[1]));\n+}\n+\n+static void\n diff_stats(const struct reconnect_stats *old,\n            const struct reconnect_stats *new,\n            int delta)\n@@ -284,6 +290,8 @@ static const struct ovs_cmdl_command all_commands[] = {\n     { \"advance\", NULL, 1, 1, do_advance, OVS_RO },\n     { \"timeout\", NULL, 0, 0, do_timeout, OVS_RO },\n     { \"set-max-tries\", NULL, 1, 1, do_set_max_tries, OVS_RO },\n+    { \"set-backoff-free-tries\", NULL, 1, 1, do_set_backoff_free_tries,\n+      OVS_RO },\n     { \"passive\", NULL, 0, 0, do_set_passive, OVS_RO },\n     { \"listening\", NULL, 0, 0, do_listening, OVS_RO },\n     { \"listen-error\", NULL, 1, 1, do_listen_error, OVS_RO },\ndiff --git a/tests/test-reconnect.py b/tests/test-reconnect.py\nindex 8132fd9258ef..6cd052878eb1 100644\n--- a/tests/test-reconnect.py\n+++ b/tests/test-reconnect.py\n@@ -104,6 +104,10 @@ def do_set_max_tries(arg):\n     r.set_max_tries(int(arg))\n \n \n+def do_set_backoff_free_tries(arg):\n+    r.set_backoff_free_tries(int(arg))\n+\n+\n def diff_stats(old, new, delta):\n     if (old.state != new.state or\n         old.state_elapsed != new.state_elapsed or\n@@ -173,6 +177,7 @@ def main():\n         \"advance\": do_advance,\n         \"timeout\": do_timeout,\n         \"set-max-tries\": do_set_max_tries,\n+        \"set-backoff-free-tries\": do_set_backoff_free_tries,\n         \"passive\": do_set_passive,\n         \"listening\": do_listening,\n         \"listen-error\": do_listen_error\n","prefixes":["ovs-dev","RFC","41/52"]}