[LEDE-DEV] procd: Restore respawn on SIGTERM timeout

Message ID 20171019130215.31354-1-kristian.evensen@gmail.com
State New
Headers show
Series
  • [LEDE-DEV] procd: Restore respawn on SIGTERM timeout
Related show

Commit Message

Kristian Evensen Oct. 19, 2017, 1:02 p.m.
When SIGTERM times out, procd sends SIGKILL and then restarts the
process once SIGCHLD has been received. This all works fine, with one
exception - respawn is not restored when instance_start() is called from
instance_exit(). The reason is that respawn is always set to false in
instance_stop(), and the same service_instance struct is used for the
instance_start()-call.

The consequence is that if the process is killed/crashes again, it will
not respawn. Solve this issue by adding a variable used to store the
original value of respawn in instance_stop(), and then restore the
original respawn-value in instance_exit().

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
---
 service/instance.c | 6 ++++--
 service/instance.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

Comments

Karl Palsson Oct. 19, 2017, 4:24 p.m. | #1
Kristian Evensen <kristian.evensen@gmail.com> wrote:
> When SIGTERM times out, procd sends SIGKILL and then restarts
> the process once SIGCHLD has been received. This all works
> fine, with one exception - respawn is not restored when
> instance_start() is called from instance_exit(). The reason is
> that respawn is always set to false in instance_stop(), and the
> same service_instance struct is used for the
> instance_start()-call.
> 
> The consequence is that if the process is killed/crashes again,
> it will not respawn. Solve this issue by adding a variable used
> to store the original value of respawn in instance_stop(), and
> then restore the original respawn-value in instance_exit().

It smells like this likely applies to many other fields. Is there
a path here that's not using the copy/compare routines for a
service/instance? Should they be? Does your path even restore all
the parameters of respawn?

Cheers,
Karl P


> 
> Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
> ---
>  service/instance.c | 6 ++++--
>  service/instance.h | 1 +
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/service/instance.c b/service/instance.c
> index b7cb523..76c74ed 100644
> --- a/service/instance.c
> +++ b/service/instance.c
> @@ -532,9 +532,10 @@ instance_exit(struct uloop_process *p, int ret)
>  
>  	if (in->halt) {
>  		instance_removepid(in);
> -		if (in->restart)
> +		if (in->restart) {
> +			in->respawn = in->respawn_org;
>  			instance_start(in);
> -		else {
> +		} else {
>  			struct service *s = in->srv;
>  
>  			avl_delete(&s->instances.avl, &in->node.avl);
> @@ -567,6 +568,7 @@ instance_stop(struct service_instance *in, bool halt)
>  	if (!in->proc.pending)
>  		return;
>  	in->halt = halt;
> +	in->respawn_org = in->respawn;
>  	in->restart = in->respawn = false;
>  	kill(in->proc.pid, SIGTERM);
>  	uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
> diff --git a/service/instance.h b/service/instance.h
> index bdd14de..a0ac302 100644
> --- a/service/instance.h
> +++ b/service/instance.h
> @@ -48,6 +48,7 @@ struct service_instance {
>  	bool halt;
>  	bool restart;
>  	bool respawn;
> +	bool respawn_org;
>  	int respawn_count;
>  	int reload_signal;
>  	struct timespec start;
> -- 
> 2.11.0
> 
> 
> _______________________________________________
> Lede-dev mailing list
> Lede-dev@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/lede-dev

Patch

diff --git a/service/instance.c b/service/instance.c
index b7cb523..76c74ed 100644
--- a/service/instance.c
+++ b/service/instance.c
@@ -532,9 +532,10 @@  instance_exit(struct uloop_process *p, int ret)
 
 	if (in->halt) {
 		instance_removepid(in);
-		if (in->restart)
+		if (in->restart) {
+			in->respawn = in->respawn_org;
 			instance_start(in);
-		else {
+		} else {
 			struct service *s = in->srv;
 
 			avl_delete(&s->instances.avl, &in->node.avl);
@@ -567,6 +568,7 @@  instance_stop(struct service_instance *in, bool halt)
 	if (!in->proc.pending)
 		return;
 	in->halt = halt;
+	in->respawn_org = in->respawn;
 	in->restart = in->respawn = false;
 	kill(in->proc.pid, SIGTERM);
 	uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
diff --git a/service/instance.h b/service/instance.h
index bdd14de..a0ac302 100644
--- a/service/instance.h
+++ b/service/instance.h
@@ -48,6 +48,7 @@  struct service_instance {
 	bool halt;
 	bool restart;
 	bool respawn;
+	bool respawn_org;
 	int respawn_count;
 	int reload_signal;
 	struct timespec start;