| Message ID | 20171204052947.7827-1-nusiddiq@redhat.com |
|---|---|
| State | Changes Requested |
| Headers | show |
| Series | [ovs-dev,v3] OVN pacemaker: Add the monitor action for Master role | expand |
On Mon, Dec 4, 2017 at 12:29 AM, <nusiddiq@redhat.com> wrote: > From: Numan Siddique <nusiddiq@redhat.com> > > Pacemaker Resource agent periodically calls the OVN OCF's "monitor" action > periodically to check the status. But the OVN OCF script doesn't add the > action "monitor" for the role "Master" because of which the pacemaker > resource agent do not call the "monitor" action at all for the master. > In case OVN db servers exit for some reason this totally gets undetected > and one of the standby node is not promoted to master. > > This patch adds the monitor action for "Master" role. Also the monitor > action do not check for the status of the ovn-northd (if manage_northd is yes). > This patch also checks for the status of the ovn-northd in the monitor action > for the "Master" role. If any of the ovsdb-server or ovn-northd is not running, > monitor action will return OCF_NOT_RUNNING and this will cause the pacemaker > to restart the OVN OCF resource. > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568 > Signed-off-by: Numan Siddique <nusiddiq@redhat.com> > CC: Russel Bryant <russell@ovn.org> > --- > > v2 -> v3 > -------- > In the ovsdb_server_demote added the check to see the status of > ovn-northd if it is running as master. v2 was not working for > pacemaker OVN docker bundle resource. > > v1 -> v2 > ----- > Reverted the change to use 'ocf_attribute_target' as this function is > only availabe in pacemaker 1.1.16-12 > > ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++++++------- > 1 file changed, 41 insertions(+), 8 deletions(-) > > diff --git a/ovn/utilities/ovndb-servers.ocf b/ovn/utilities/ovndb-servers.ocf > index 3f3008700..389307a84 100755 > --- a/ovn/utilities/ovndb-servers.ocf > +++ b/ovn/utilities/ovndb-servers.ocf > @@ -120,7 +120,11 @@ ovsdb_server_metadata() { > <action name="stop" timeout="20s" /> > <action name="promote" timeout="50s" /> > <action name="demote" timeout="50s" /> > - <action name="monitor" timeout="20s" depth="0" interval="10s" /> > + <action name="monitor" timeout="20s" depth="0" interval="30s" /> Just making sure ... did you mean to leave this third "monitor" entry here? I don't really know how this works, but it looked like the next two would replace this one. > + <action name="monitor" timeout="20s" depth="0" interval="10s" > + role="Master" /> > + <action name="monitor" timeout="20s" depth="0" interval="30s" > + role="Slave"/> > <action name="meta-data" timeout="5s" /> > <action name="validate-all" timeout="20s" /> > </actions> > @@ -247,7 +251,7 @@ ovsdb_server_master_update() { > } > > ovsdb_server_monitor() { > - ovsdb_server_check_status > + ovsdb_server_check_status $@ > rc=$? > > ovsdb_server_master_update $rc > @@ -262,8 +266,21 @@ ovsdb_server_check_status() { > return $OCF_SUCCESS > fi > > + check_northd="no" > + if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; then > + check_northd="yes" > + fi > + > if [[ $sb_status == "running/active" && $nb_status == "running/active" ]]; then > - return $OCF_RUNNING_MASTER > + if [ "$check_northd" == "yes" ]; then > + # Verify if ovn-northd is running or not. > + ${OVN_CTL} status_northd | grep "ovn-northd is running" Is the grep needed? Can you just rely on the exit code of ovn-ctl? This script will fail if the output of ovn-ctl is changed in the future. > + if [ "$?" == "0" ] ; then > + return $OCF_RUNNING_MASTER > + fi > + else > + return $OCF_RUNNING_MASTER > + fi > fi > > # TODO: What about service running but not in either state above? > @@ -317,8 +334,13 @@ ovsdb_server_start() { > $@ start_ovsdb > > while [ 1 = 1 ]; do > - # It is important that we don't return until we're in a functional state > - ovsdb_server_monitor > + # It is important that we don't return until we're in a functional > + # state. When checking the status of the ovsdb-server's ignore northd. > + # It is possible that when the resource is restarted ovsdb-server's > + # can be started as masters and ovn-northd would not have been started. > + # ovn-northd will be started once a node is promoted to master and > + # 'manage_northd' is set to yes. > + ovsdb_server_monitor ignore_northd > rc=$? > case $rc in > $OCF_SUCCESS) return $rc;; > @@ -350,7 +372,7 @@ ovsdb_server_stop() { > ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd > fi > > - ovsdb_server_check_status > + ovsdb_server_check_status ignore_northd > case $? in > $OCF_NOT_RUNNING) return ${OCF_SUCCESS};; > esac > @@ -360,7 +382,7 @@ ovsdb_server_stop() { > > while [ 1 = 1 ]; do > # It is important that we don't return until we're stopped > - ovsdb_server_check_status > + ovsdb_server_check_status ignore_northd > rc=$? > case $rc in > $OCF_SUCCESS) > @@ -381,7 +403,7 @@ ovsdb_server_stop() { > } > > ovsdb_server_promote() { > - ovsdb_server_check_status > + ovsdb_server_check_status ignore_northd > rc=$? > case $rc in > ${OCF_SUCCESS}) ;; > @@ -395,6 +417,11 @@ ovsdb_server_promote() { > ${OVN_CTL} promote_ovnnb > ${OVN_CTL} promote_ovnsb > > + if [ "$MANAGE_NORTHD" = "yes" ]; then > + # Startup ovn-northd service > + ${OVN_CTL} --ovn-manage-ovsdb=no start_northd > + fi > + > ocf_log debug "ovndb_servers: Promoting $host_name as the master" > # Record ourselves so that the agent has a better chance of doing > # the right thing at startup > @@ -404,6 +431,8 @@ ovsdb_server_promote() { > } > > ovsdb_server_demote() { > + # While demoting, check the status of ovn_northd. > + # In case ovn_northd is not running, we should return OCF_NOT_RUNNING. > ovsdb_server_check_status > if [ $? = $OCF_NOT_RUNNING ]; then > return $OCF_NOT_RUNNING > @@ -452,6 +481,10 @@ ovsdb_server_demote() { > ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVALID_IP_ADDRESS} > fi > > + if [ "$MANAGE_NORTHD" = "yes" ]; then > + # Stop ovn-northd service > + ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd > + fi > ovsdb_server_master_update $OCF_SUCCESS > return $OCF_SUCCESS > } > -- > 2.14.3 >
On Mon, Dec 4, 2017 at 7:12 PM, Russell Bryant <russell@ovn.org> wrote: > On Mon, Dec 4, 2017 at 12:29 AM, <nusiddiq@redhat.com> wrote: > > From: Numan Siddique <nusiddiq@redhat.com> > > > > Pacemaker Resource agent periodically calls the OVN OCF's "monitor" > action > > periodically to check the status. But the OVN OCF script doesn't add the > > action "monitor" for the role "Master" because of which the pacemaker > > resource agent do not call the "monitor" action at all for the master. > > In case OVN db servers exit for some reason this totally gets undetected > > and one of the standby node is not promoted to master. > > > > This patch adds the monitor action for "Master" role. Also the monitor > > action do not check for the status of the ovn-northd (if manage_northd > is yes). > > This patch also checks for the status of the ovn-northd in the monitor > action > > for the "Master" role. If any of the ovsdb-server or ovn-northd is not > running, > > monitor action will return OCF_NOT_RUNNING and this will cause the > pacemaker > > to restart the OVN OCF resource. > > > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568 > > Signed-off-by: Numan Siddique <nusiddiq@redhat.com> > > CC: Russel Bryant <russell@ovn.org> > > --- > > > > v2 -> v3 > > -------- > > In the ovsdb_server_demote added the check to see the status of > > ovn-northd if it is running as master. v2 was not working for > > pacemaker OVN docker bundle resource. > > > > v1 -> v2 > > ----- > > Reverted the change to use 'ocf_attribute_target' as this function is > > only availabe in pacemaker 1.1.16-12 > > > > ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++ > ++++------- > > 1 file changed, 41 insertions(+), 8 deletions(-) > > > > diff --git a/ovn/utilities/ovndb-servers.ocf > b/ovn/utilities/ovndb-servers.ocf > > index 3f3008700..389307a84 100755 > > --- a/ovn/utilities/ovndb-servers.ocf > > +++ b/ovn/utilities/ovndb-servers.ocf > > @@ -120,7 +120,11 @@ ovsdb_server_metadata() { > > <action name="stop" timeout="20s" /> > > <action name="promote" timeout="50s" /> > > <action name="demote" timeout="50s" /> > > - <action name="monitor" timeout="20s" depth="0" interval="10s" > /> > > + <action name="monitor" timeout="20s" depth="0" interval="30s" > /> > > Just making sure ... did you mean to leave this third "monitor" entry > here? I don't really know how this works, but it looked like the next > two would replace this one. > I referred to galera resource agent as an example [1] and it had 3 monitor actions. So thought of keeping the same way. I will test it out and remove it if it is not required. [1] - https://github.com/ClusterLabs/resource-agents/blob/master/heartbeat/galera#L256 > > > + <action name="monitor" timeout="20s" depth="0" interval="10s" > > + role="Master" /> > > + <action name="monitor" timeout="20s" depth="0" interval="30s" > > + role="Slave"/> > > <action name="meta-data" timeout="5s" /> > > <action name="validate-all" timeout="20s" /> > > </actions> > > @@ -247,7 +251,7 @@ ovsdb_server_master_update() { > > } > > > > ovsdb_server_monitor() { > > - ovsdb_server_check_status > > + ovsdb_server_check_status $@ > > rc=$? > > > > ovsdb_server_master_update $rc > > @@ -262,8 +266,21 @@ ovsdb_server_check_status() { > > return $OCF_SUCCESS > > fi > > > > + check_northd="no" > > + if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; > then > > + check_northd="yes" > > + fi > > + > > if [[ $sb_status == "running/active" && $nb_status == > "running/active" ]]; then > > - return $OCF_RUNNING_MASTER > > + if [ "$check_northd" == "yes" ]; then > > + # Verify if ovn-northd is running or not. > > + ${OVN_CTL} status_northd | grep "ovn-northd is running" > > Is the grep needed? Can you just rely on the exit code of ovn-ctl? > This script will fail if the output of ovn-ctl is changed in the > future. > I thought I would be explicit. But I agree with you. Thanks for pointing out. I will submit v4 soon. > > + if [ "$?" == "0" ] ; then > > + return $OCF_RUNNING_MASTER > > + fi > > + else > > + return $OCF_RUNNING_MASTER > > + fi > > fi > > > > # TODO: What about service running but not in either state above? > > @@ -317,8 +334,13 @@ ovsdb_server_start() { > > $@ start_ovsdb > > > > while [ 1 = 1 ]; do > > - # It is important that we don't return until we're in a > functional state > > - ovsdb_server_monitor > > + # It is important that we don't return until we're in a > functional > > + # state. When checking the status of the ovsdb-server's ignore > northd. > > + # It is possible that when the resource is restarted > ovsdb-server's > > + # can be started as masters and ovn-northd would not have been > started. > > + # ovn-northd will be started once a node is promoted to master > and > > + # 'manage_northd' is set to yes. > > + ovsdb_server_monitor ignore_northd > > rc=$? > > case $rc in > > $OCF_SUCCESS) return $rc;; > > @@ -350,7 +372,7 @@ ovsdb_server_stop() { > > ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd > > fi > > > > - ovsdb_server_check_status > > + ovsdb_server_check_status ignore_northd > > case $? in > > $OCF_NOT_RUNNING) return ${OCF_SUCCESS};; > > esac > > @@ -360,7 +382,7 @@ ovsdb_server_stop() { > > > > while [ 1 = 1 ]; do > > # It is important that we don't return until we're stopped > > - ovsdb_server_check_status > > + ovsdb_server_check_status ignore_northd > > rc=$? > > case $rc in > > $OCF_SUCCESS) > > @@ -381,7 +403,7 @@ ovsdb_server_stop() { > > } > > > > ovsdb_server_promote() { > > - ovsdb_server_check_status > > + ovsdb_server_check_status ignore_northd > > rc=$? > > case $rc in > > ${OCF_SUCCESS}) ;; > > @@ -395,6 +417,11 @@ ovsdb_server_promote() { > > ${OVN_CTL} promote_ovnnb > > ${OVN_CTL} promote_ovnsb > > > > + if [ "$MANAGE_NORTHD" = "yes" ]; then > > + # Startup ovn-northd service > > + ${OVN_CTL} --ovn-manage-ovsdb=no start_northd > > + fi > > + > > ocf_log debug "ovndb_servers: Promoting $host_name as the master" > > # Record ourselves so that the agent has a better chance of doing > > # the right thing at startup > > @@ -404,6 +431,8 @@ ovsdb_server_promote() { > > } > > > > ovsdb_server_demote() { > > + # While demoting, check the status of ovn_northd. > > + # In case ovn_northd is not running, we should return > OCF_NOT_RUNNING. > > ovsdb_server_check_status > > if [ $? = $OCF_NOT_RUNNING ]; then > > return $OCF_NOT_RUNNING > > @@ -452,6 +481,10 @@ ovsdb_server_demote() { > > ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${ > INVALID_IP_ADDRESS} > > fi > > > > + if [ "$MANAGE_NORTHD" = "yes" ]; then > > + # Stop ovn-northd service > > + ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd > > + fi > > ovsdb_server_master_update $OCF_SUCCESS > > return $OCF_SUCCESS > > } > > -- > > 2.14.3 > > > > > > -- > Russell Bryant >
On Mon, Dec 4, 2017 at 7:39 PM, Numan Siddique <nusiddiq@redhat.com> wrote: > > > On Mon, Dec 4, 2017 at 7:12 PM, Russell Bryant <russell@ovn.org> wrote: > >> On Mon, Dec 4, 2017 at 12:29 AM, <nusiddiq@redhat.com> wrote: >> > From: Numan Siddique <nusiddiq@redhat.com> >> > >> > Pacemaker Resource agent periodically calls the OVN OCF's "monitor" >> action >> > periodically to check the status. But the OVN OCF script doesn't add the >> > action "monitor" for the role "Master" because of which the pacemaker >> > resource agent do not call the "monitor" action at all for the master. >> > In case OVN db servers exit for some reason this totally gets undetected >> > and one of the standby node is not promoted to master. >> > >> > This patch adds the monitor action for "Master" role. Also the monitor >> > action do not check for the status of the ovn-northd (if manage_northd >> is yes). >> > This patch also checks for the status of the ovn-northd in the monitor >> action >> > for the "Master" role. If any of the ovsdb-server or ovn-northd is not >> running, >> > monitor action will return OCF_NOT_RUNNING and this will cause the >> pacemaker >> > to restart the OVN OCF resource. >> > >> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568 >> > Signed-off-by: Numan Siddique <nusiddiq@redhat.com> >> > CC: Russel Bryant <russell@ovn.org> >> > --- >> > >> > v2 -> v3 >> > -------- >> > In the ovsdb_server_demote added the check to see the status of >> > ovn-northd if it is running as master. v2 was not working for >> > pacemaker OVN docker bundle resource. >> > >> > v1 -> v2 >> > ----- >> > Reverted the change to use 'ocf_attribute_target' as this function is >> > only availabe in pacemaker 1.1.16-12 >> > >> > ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++ >> ++++------- >> > 1 file changed, 41 insertions(+), 8 deletions(-) >> > >> > diff --git a/ovn/utilities/ovndb-servers.ocf >> b/ovn/utilities/ovndb-servers.ocf >> > index 3f3008700..389307a84 100755 >> > --- a/ovn/utilities/ovndb-servers.ocf >> > +++ b/ovn/utilities/ovndb-servers.ocf >> > @@ -120,7 +120,11 @@ ovsdb_server_metadata() { >> > <action name="stop" timeout="20s" /> >> > <action name="promote" timeout="50s" /> >> > <action name="demote" timeout="50s" /> >> > - <action name="monitor" timeout="20s" depth="0" >> interval="10s" /> >> > + <action name="monitor" timeout="20s" depth="0" >> interval="30s" /> >> >> Just making sure ... did you mean to leave this third "monitor" entry >> here? I don't really know how this works, but it looked like the next >> two would replace this one. >> > > I referred to galera resource agent as an example [1] and it had 3 monitor > actions. So thought of keeping the same way. > I will test it out and remove it if it is not required. > > [1] - https://github.com/ClusterLabs/resource-agents/ > blob/master/heartbeat/galera#L256 > > I tested it and it works fine without the extra monitor action. Also the redis RA script doesn't have the extra monitor action (without any role). Removed it in v4. Thanks Russel for the review. > > >> >> > + <action name="monitor" timeout="20s" depth="0" interval="10s" >> > + role="Master" /> >> > + <action name="monitor" timeout="20s" depth="0" interval="30s" >> > + role="Slave"/> >> > <action name="meta-data" timeout="5s" /> >> > <action name="validate-all" timeout="20s" /> >> > </actions> >> > @@ -247,7 +251,7 @@ ovsdb_server_master_update() { >> > } >> > >> > ovsdb_server_monitor() { >> > - ovsdb_server_check_status >> > + ovsdb_server_check_status $@ >> > rc=$? >> > >> > ovsdb_server_master_update $rc >> > @@ -262,8 +266,21 @@ ovsdb_server_check_status() { >> > return $OCF_SUCCESS >> > fi >> > >> > + check_northd="no" >> > + if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; >> then >> > + check_northd="yes" >> > + fi >> > + >> > if [[ $sb_status == "running/active" && $nb_status == >> "running/active" ]]; then >> > - return $OCF_RUNNING_MASTER >> > + if [ "$check_northd" == "yes" ]; then >> > + # Verify if ovn-northd is running or not. >> > + ${OVN_CTL} status_northd | grep "ovn-northd is running" >> >> Is the grep needed? Can you just rely on the exit code of ovn-ctl? >> This script will fail if the output of ovn-ctl is changed in the >> future. >> > > I thought I would be explicit. But I agree with you. Thanks for pointing > out. I will submit v4 soon. > > >> > + if [ "$?" == "0" ] ; then >> > + return $OCF_RUNNING_MASTER >> > + fi >> > + else >> > + return $OCF_RUNNING_MASTER >> > + fi >> > fi >> > >> > # TODO: What about service running but not in either state above? >> > @@ -317,8 +334,13 @@ ovsdb_server_start() { >> > $@ start_ovsdb >> > >> > while [ 1 = 1 ]; do >> > - # It is important that we don't return until we're in a >> functional state >> > - ovsdb_server_monitor >> > + # It is important that we don't return until we're in a >> functional >> > + # state. When checking the status of the ovsdb-server's ignore >> northd. >> > + # It is possible that when the resource is restarted >> ovsdb-server's >> > + # can be started as masters and ovn-northd would not have been >> started. >> > + # ovn-northd will be started once a node is promoted to master >> and >> > + # 'manage_northd' is set to yes. >> > + ovsdb_server_monitor ignore_northd >> > rc=$? >> > case $rc in >> > $OCF_SUCCESS) return $rc;; >> > @@ -350,7 +372,7 @@ ovsdb_server_stop() { >> > ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd >> > fi >> > >> > - ovsdb_server_check_status >> > + ovsdb_server_check_status ignore_northd >> > case $? in >> > $OCF_NOT_RUNNING) return ${OCF_SUCCESS};; >> > esac >> > @@ -360,7 +382,7 @@ ovsdb_server_stop() { >> > >> > while [ 1 = 1 ]; do >> > # It is important that we don't return until we're stopped >> > - ovsdb_server_check_status >> > + ovsdb_server_check_status ignore_northd >> > rc=$? >> > case $rc in >> > $OCF_SUCCESS) >> > @@ -381,7 +403,7 @@ ovsdb_server_stop() { >> > } >> > >> > ovsdb_server_promote() { >> > - ovsdb_server_check_status >> > + ovsdb_server_check_status ignore_northd >> > rc=$? >> > case $rc in >> > ${OCF_SUCCESS}) ;; >> > @@ -395,6 +417,11 @@ ovsdb_server_promote() { >> > ${OVN_CTL} promote_ovnnb >> > ${OVN_CTL} promote_ovnsb >> > >> > + if [ "$MANAGE_NORTHD" = "yes" ]; then >> > + # Startup ovn-northd service >> > + ${OVN_CTL} --ovn-manage-ovsdb=no start_northd >> > + fi >> > + >> > ocf_log debug "ovndb_servers: Promoting $host_name as the master" >> > # Record ourselves so that the agent has a better chance of doing >> > # the right thing at startup >> > @@ -404,6 +431,8 @@ ovsdb_server_promote() { >> > } >> > >> > ovsdb_server_demote() { >> > + # While demoting, check the status of ovn_northd. >> > + # In case ovn_northd is not running, we should return >> OCF_NOT_RUNNING. >> > ovsdb_server_check_status >> > if [ $? = $OCF_NOT_RUNNING ]; then >> > return $OCF_NOT_RUNNING >> > @@ -452,6 +481,10 @@ ovsdb_server_demote() { >> > ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVAL >> ID_IP_ADDRESS} >> > fi >> > >> > + if [ "$MANAGE_NORTHD" = "yes" ]; then >> > + # Stop ovn-northd service >> > + ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd >> > + fi >> > ovsdb_server_master_update $OCF_SUCCESS >> > return $OCF_SUCCESS >> > } >> > -- >> > 2.14.3 >> > >> >> >> >> -- >> Russell Bryant >> > >
diff --git a/ovn/utilities/ovndb-servers.ocf b/ovn/utilities/ovndb-servers.ocf index 3f3008700..389307a84 100755 --- a/ovn/utilities/ovndb-servers.ocf +++ b/ovn/utilities/ovndb-servers.ocf @@ -120,7 +120,11 @@ ovsdb_server_metadata() { <action name="stop" timeout="20s" /> <action name="promote" timeout="50s" /> <action name="demote" timeout="50s" /> - <action name="monitor" timeout="20s" depth="0" interval="10s" /> + <action name="monitor" timeout="20s" depth="0" interval="30s" /> + <action name="monitor" timeout="20s" depth="0" interval="10s" + role="Master" /> + <action name="monitor" timeout="20s" depth="0" interval="30s" + role="Slave"/> <action name="meta-data" timeout="5s" /> <action name="validate-all" timeout="20s" /> </actions> @@ -247,7 +251,7 @@ ovsdb_server_master_update() { } ovsdb_server_monitor() { - ovsdb_server_check_status + ovsdb_server_check_status $@ rc=$? ovsdb_server_master_update $rc @@ -262,8 +266,21 @@ ovsdb_server_check_status() { return $OCF_SUCCESS fi + check_northd="no" + if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; then + check_northd="yes" + fi + if [[ $sb_status == "running/active" && $nb_status == "running/active" ]]; then - return $OCF_RUNNING_MASTER + if [ "$check_northd" == "yes" ]; then + # Verify if ovn-northd is running or not. + ${OVN_CTL} status_northd | grep "ovn-northd is running" + if [ "$?" == "0" ] ; then + return $OCF_RUNNING_MASTER + fi + else + return $OCF_RUNNING_MASTER + fi fi # TODO: What about service running but not in either state above? @@ -317,8 +334,13 @@ ovsdb_server_start() { $@ start_ovsdb while [ 1 = 1 ]; do - # It is important that we don't return until we're in a functional state - ovsdb_server_monitor + # It is important that we don't return until we're in a functional + # state. When checking the status of the ovsdb-server's ignore northd. + # It is possible that when the resource is restarted ovsdb-server's + # can be started as masters and ovn-northd would not have been started. + # ovn-northd will be started once a node is promoted to master and + # 'manage_northd' is set to yes. + ovsdb_server_monitor ignore_northd rc=$? case $rc in $OCF_SUCCESS) return $rc;; @@ -350,7 +372,7 @@ ovsdb_server_stop() { ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd fi - ovsdb_server_check_status + ovsdb_server_check_status ignore_northd case $? in $OCF_NOT_RUNNING) return ${OCF_SUCCESS};; esac @@ -360,7 +382,7 @@ ovsdb_server_stop() { while [ 1 = 1 ]; do # It is important that we don't return until we're stopped - ovsdb_server_check_status + ovsdb_server_check_status ignore_northd rc=$? case $rc in $OCF_SUCCESS) @@ -381,7 +403,7 @@ ovsdb_server_stop() { } ovsdb_server_promote() { - ovsdb_server_check_status + ovsdb_server_check_status ignore_northd rc=$? case $rc in ${OCF_SUCCESS}) ;; @@ -395,6 +417,11 @@ ovsdb_server_promote() { ${OVN_CTL} promote_ovnnb ${OVN_CTL} promote_ovnsb + if [ "$MANAGE_NORTHD" = "yes" ]; then + # Startup ovn-northd service + ${OVN_CTL} --ovn-manage-ovsdb=no start_northd + fi + ocf_log debug "ovndb_servers: Promoting $host_name as the master" # Record ourselves so that the agent has a better chance of doing # the right thing at startup @@ -404,6 +431,8 @@ ovsdb_server_promote() { } ovsdb_server_demote() { + # While demoting, check the status of ovn_northd. + # In case ovn_northd is not running, we should return OCF_NOT_RUNNING. ovsdb_server_check_status if [ $? = $OCF_NOT_RUNNING ]; then return $OCF_NOT_RUNNING @@ -452,6 +481,10 @@ ovsdb_server_demote() { ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVALID_IP_ADDRESS} fi + if [ "$MANAGE_NORTHD" = "yes" ]; then + # Stop ovn-northd service + ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd + fi ovsdb_server_master_update $OCF_SUCCESS return $OCF_SUCCESS }