diff mbox

[OpenWrt-Devel] ubus/libubox: SIGTERM/SIGINT signals received during ubus_complete_request() calls are ignored

Message ID a2381f11-cba1-b64d-f34e-72858f9169c6@nbd.name
State Changes Requested
Delegated to: John Crispin
Headers show

Commit Message

Felix Fietkau Feb. 3, 2017, 1:47 p.m. UTC
Hi Alin,

On 2017-02-03 09:29, Alin Năstac wrote:
> Hi Felix,
> 
> SIGTERM & SIGINT signals received during ubus_complete_request()
> waiting for ubus_poll_data() to return are ignored due to
> uloop_cancelled being restored to its previous value it had before
> uloop_poll_data() was called.
> 
> The reproduction scenario is this:
>   1) cancelled local variable is set to false, current value of uloop_cancelled
>   2) while ubus_poll_data() is waiting for a read event, a SIGTERM is
> received, so uloop_cancelled is set to true
>   3) after ubus_poll_data() returns, uloop_cancelled value gets
> overwritten with false and SIGTERM signal ends up being ignored in the
> uloop_run()
> 
> The whole uloop_cancelled related logic in the ubus_complete_request()
> seems to be focused on getting out from the current recursion of
> uloop_run(), but from what I see uloop_run() capability to be called
> recursively is no longer used in ubus, so I wonder if it is still
> necessary.
I left in uloop_cancelled primarily to deal with cleaning up recursive
requests after Ctrl+c or SIGTERM when uloop is in use.

> If the answer is no, the entire logic referring to uloop_cancelled and
> uloop_end() should be removed from libubus-req.c. Otherwise an
> additional uloop_cancelled_recursions bit mask would be needed that
> will allow to control uloop_run() exit condition for each individual
> recursion.
I think the main problem was the fact that uloop_cancelled was used
both for internal request termination and also for external use.
Here's a patch that should resolve this issue, please test:

---

Comments

Alin Năstac Feb. 3, 2017, 2:57 p.m. UTC | #1
Hi Felix,

The SIGTERM ignore issue I was experiencing before is no longer
reproducible after I apply your patch.

However, I'm concerned about a possible ignore of SIGTERM signal
received during a ubus_complete_request() call. If ctx->stack_depth is
0, any such signal received between prev_uloop_initialization and the
reset of ulopp_cancelling to false will be ignored. Is this
"uloop_cancelling = false" really necessary?

BTW, I think the reset of uloop_status and uloop_cancelled should be
executed before uloop_setup_signals() like so:
        if (!recursive_calls++) {
               uloop_status = 0;
               uloop_cancelled = false;
               uloop_setup_signals(true);
        }

Cheers,
Alin

On Fri, Feb 3, 2017 at 2:47 PM, Felix Fietkau <nbd@nbd.name> wrote:
> Hi Alin,
>
> On 2017-02-03 09:29, Alin Năstac wrote:
>> Hi Felix,
>>
>> SIGTERM & SIGINT signals received during ubus_complete_request()
>> waiting for ubus_poll_data() to return are ignored due to
>> uloop_cancelled being restored to its previous value it had before
>> uloop_poll_data() was called.
>>
>> The reproduction scenario is this:
>>   1) cancelled local variable is set to false, current value of uloop_cancelled
>>   2) while ubus_poll_data() is waiting for a read event, a SIGTERM is
>> received, so uloop_cancelled is set to true
>>   3) after ubus_poll_data() returns, uloop_cancelled value gets
>> overwritten with false and SIGTERM signal ends up being ignored in the
>> uloop_run()
>>
>> The whole uloop_cancelled related logic in the ubus_complete_request()
>> seems to be focused on getting out from the current recursion of
>> uloop_run(), but from what I see uloop_run() capability to be called
>> recursively is no longer used in ubus, so I wonder if it is still
>> necessary.
> I left in uloop_cancelled primarily to deal with cleaning up recursive
> requests after Ctrl+c or SIGTERM when uloop is in use.
>
>> If the answer is no, the entire logic referring to uloop_cancelled and
>> uloop_end() should be removed from libubus-req.c. Otherwise an
>> additional uloop_cancelled_recursions bit mask would be needed that
>> will allow to control uloop_run() exit condition for each individual
>> recursion.
> I think the main problem was the fact that uloop_cancelled was used
> both for internal request termination and also for external use.
> Here's a patch that should resolve this issue, please test:
>
> ---
> diff --git a/libubus-io.c b/libubus-io.c
> index 1075c65..1343bb2 100644
> --- a/libubus-io.c
> +++ b/libubus-io.c
> @@ -154,9 +154,10 @@ int __hidden ubus_send_msg(struct ubus_context *ctx, uint32_t seq,
>         return ret;
>  }
>
> -static int recv_retry(int fd, struct iovec *iov, bool wait, int *recv_fd)
> +static int recv_retry(struct ubus_context *ctx, struct iovec *iov, bool wait, int *recv_fd)
>  {
>         int bytes, total = 0;
> +       int fd = ctx->sock.fd;
>         static struct {
>                 struct cmsghdr h;
>                 int fd;
> @@ -191,7 +192,7 @@ static int recv_retry(int fd, struct iovec *iov, bool wait, int *recv_fd)
>
>                 if (bytes < 0) {
>                         bytes = 0;
> -                       if (uloop_cancelled)
> +                       if (uloop_cancelled || ctx->cancel_poll)
>                                 return 0;
>                         if (errno == EINTR)
>                                 continue;
> @@ -274,7 +275,7 @@ static bool get_next_msg(struct ubus_context *ctx, int *recv_fd)
>         int r;
>
>         /* receive header + start attribute */
> -       r = recv_retry(ctx->sock.fd, &iov, false, recv_fd);
> +       r = recv_retry(ctx, &iov, false, recv_fd);
>         if (r <= 0) {
>                 if (r < 0)
>                         ctx->sock.eof = true;
> @@ -298,7 +299,7 @@ static bool get_next_msg(struct ubus_context *ctx, int *recv_fd)
>         iov.iov_base = (char *)ctx->msgbuf.data + sizeof(hdrbuf.data);
>         iov.iov_len = blob_len(ctx->msgbuf.data);
>         if (iov.iov_len > 0 &&
> -           recv_retry(ctx->sock.fd, &iov, true, NULL) <= 0)
> +           recv_retry(ctx, &iov, true, NULL) <= 0)
>                 return false;
>
>         return true;
> @@ -311,7 +312,7 @@ void __hidden ubus_handle_data(struct uloop_fd *u, unsigned int events)
>
>         while (get_next_msg(ctx, &recv_fd)) {
>                 ubus_process_msg(ctx, &ctx->msgbuf, recv_fd);
> -               if (uloop_cancelled)
> +               if (uloop_cancelled || ctx->cancel_poll)
>                         break;
>         }
>
> @@ -326,6 +327,7 @@ void __hidden ubus_poll_data(struct ubus_context *ctx, int timeout)
>                 .events = POLLIN | POLLERR,
>         };
>
> +       ctx->cancel_poll = false;
>         poll(&pfd, 1, timeout ? timeout : -1);
>         ubus_handle_data(&ctx->sock, ULOOP_READ);
>  }
> diff --git a/libubus-req.c b/libubus-req.c
> index db5061c..305e813 100644
> --- a/libubus-req.c
> +++ b/libubus-req.c
> @@ -122,7 +122,7 @@ static void ubus_sync_req_cb(struct ubus_request *req, int ret)
>  {
>         req->status_msg = true;
>         req->status_code = ret;
> -       uloop_end();
> +       req->ctx->cancel_poll = true;
>  }
>
>  static int64_t get_time_msec(void)
> @@ -142,6 +142,7 @@ int ubus_complete_request(struct ubus_context *ctx, struct ubus_request *req,
>         ubus_complete_handler_t complete_cb = req->complete_cb;
>         int status = UBUS_STATUS_NO_DATA;
>         int64_t timeout = 0, time_end = 0;
> +       bool prev_uloop_cancelled = uloop_cancelled;
>
>         if (req_timeout)
>                 time_end = get_time_msec() + req_timeout;
> @@ -149,29 +150,32 @@ int ubus_complete_request(struct ubus_context *ctx, struct ubus_request *req,
>         ubus_complete_request_async(ctx, req);
>         req->complete_cb = ubus_sync_req_cb;
>
> +       if (!ctx->stack_depth)
> +               uloop_cancelled = false;
> +
>         ctx->stack_depth++;
>         while (!req->status_msg) {
> -               bool cancelled = uloop_cancelled;
> -
> -               uloop_cancelled = false;
>                 if (req_timeout) {
>                         timeout = time_end - get_time_msec();
>                         if (timeout <= 0) {
>                                 ubus_set_req_status(req, UBUS_STATUS_TIMEOUT);
> -                               uloop_cancelled = cancelled;
>                                 break;
>                         }
>                 }
> +
>                 ubus_poll_data(ctx, (unsigned int) timeout);
>
> -               uloop_cancelled = cancelled;
>                 if (ctx->sock.eof) {
>                         ubus_set_req_status(req, UBUS_STATUS_CONNECTION_FAILED);
> +                       ctx->cancel_poll = true;
>                         break;
>                 }
>         }
> +
>         ctx->stack_depth--;
>         if (ctx->stack_depth)
> +               ctx->cancel_poll = true;
> +       else if (prev_uloop_cancelled)
>                 uloop_cancelled = true;
>
>         if (req->status_msg)
> diff --git a/libubus.h b/libubus.h
> index ea53272..4e45cb6 100644
> --- a/libubus.h
> +++ b/libubus.h
> @@ -155,6 +155,7 @@ struct ubus_context {
>
>         uint32_t local_id;
>         uint16_t request_seq;
> +       bool cancel_poll;
>         int stack_depth;
>
>         void (*connection_lost)(struct ubus_context *ctx);
>
Felix Fietkau Feb. 3, 2017, 3:19 p.m. UTC | #2
On 2017-02-03 15:57, Alin Năstac wrote:
> Hi Felix,
> 
> The SIGTERM ignore issue I was experiencing before is no longer
> reproducible after I apply your patch.
> 
> However, I'm concerned about a possible ignore of SIGTERM signal
> received during a ubus_complete_request() call. If ctx->stack_depth is
> 0, any such signal received between prev_uloop_initialization and the
> reset of ulopp_cancelling to false will be ignored. Is this
> "uloop_cancelling = false" really necessary?
> 
> BTW, I think the reset of uloop_status and uloop_cancelled should be
> executed before uloop_setup_signals() like so:
>         if (!recursive_calls++) {
>                uloop_status = 0;
>                uloop_cancelled = false;
>                uloop_setup_signals(true);
>         }
I was worried about the corner case of performing a ubus request after
uloop_run has already completed.
I guess one way this could be addressed is by setting uloop_cancelled =
false at the end of uloop_run().

- Felix
Alin Năstac Feb. 3, 2017, 3:41 p.m. UTC | #3
On Fri, Feb 3, 2017 at 4:19 PM, Felix Fietkau <nbd@nbd.name> wrote:
> On 2017-02-03 15:57, Alin Năstac wrote:
>> Hi Felix,
>>
>> The SIGTERM ignore issue I was experiencing before is no longer
>> reproducible after I apply your patch.
>>
>> However, I'm concerned about a possible ignore of SIGTERM signal
>> received during a ubus_complete_request() call. If ctx->stack_depth is
>> 0, any such signal received between prev_uloop_initialization and the
>> reset of ulopp_cancelling to false will be ignored. Is this
>> "uloop_cancelling = false" really necessary?
>>
>> BTW, I think the reset of uloop_status and uloop_cancelled should be
>> executed before uloop_setup_signals() like so:
>>         if (!recursive_calls++) {
>>                uloop_status = 0;
>>                uloop_cancelled = false;
>>                uloop_setup_signals(true);
>>         }
> I was worried about the corner case of performing a ubus request after
> uloop_run has already completed.
> I guess one way this could be addressed is by setting uloop_cancelled =
> false at the end of uloop_run().

How about adding this uloop function:
static int recursive_calls = 0; /* moved from uloop_run() context */
int uloop_cancelling()
{
    return recursive_calls > 0 && uloop_cancelled;
}

This function will return true only when uloop_run() is still running,
so you will no longer need to touch uloop_cancelled state at all. This
way you could reduce the scope of uloop_cancelled to uloop.c too.
Felix Fietkau Feb. 3, 2017, 5:58 p.m. UTC | #4
On 2017-02-03 16:41, Alin Năstac wrote:
> On Fri, Feb 3, 2017 at 4:19 PM, Felix Fietkau <nbd@nbd.name> wrote:
>> On 2017-02-03 15:57, Alin Năstac wrote:
>>> Hi Felix,
>>>
>>> The SIGTERM ignore issue I was experiencing before is no longer
>>> reproducible after I apply your patch.
>>>
>>> However, I'm concerned about a possible ignore of SIGTERM signal
>>> received during a ubus_complete_request() call. If ctx->stack_depth is
>>> 0, any such signal received between prev_uloop_initialization and the
>>> reset of ulopp_cancelling to false will be ignored. Is this
>>> "uloop_cancelling = false" really necessary?
>>>
>>> BTW, I think the reset of uloop_status and uloop_cancelled should be
>>> executed before uloop_setup_signals() like so:
>>>         if (!recursive_calls++) {
>>>                uloop_status = 0;
>>>                uloop_cancelled = false;
>>>                uloop_setup_signals(true);
>>>         }
>> I was worried about the corner case of performing a ubus request after
>> uloop_run has already completed.
>> I guess one way this could be addressed is by setting uloop_cancelled =
>> false at the end of uloop_run().
> 
> How about adding this uloop function:
> static int recursive_calls = 0; /* moved from uloop_run() context */
> int uloop_cancelling()
> {
>     return recursive_calls > 0 && uloop_cancelled;
> }
> 
> This function will return true only when uloop_run() is still running,
> so you will no longer need to touch uloop_cancelled state at all. This
> way you could reduce the scope of uloop_cancelled to uloop.c too.
Implemented that in libubox.git and ubus.git. Will push to LEDE master soon.

Thanks,

- Felix
diff mbox

Patch

diff --git a/libubus-io.c b/libubus-io.c
index 1075c65..1343bb2 100644
--- a/libubus-io.c
+++ b/libubus-io.c
@@ -154,9 +154,10 @@  int __hidden ubus_send_msg(struct ubus_context *ctx, uint32_t seq,
 	return ret;
 }
 
-static int recv_retry(int fd, struct iovec *iov, bool wait, int *recv_fd)
+static int recv_retry(struct ubus_context *ctx, struct iovec *iov, bool wait, int *recv_fd)
 {
 	int bytes, total = 0;
+	int fd = ctx->sock.fd;
 	static struct {
 		struct cmsghdr h;
 		int fd;
@@ -191,7 +192,7 @@  static int recv_retry(int fd, struct iovec *iov, bool wait, int *recv_fd)
 
 		if (bytes < 0) {
 			bytes = 0;
-			if (uloop_cancelled)
+			if (uloop_cancelled || ctx->cancel_poll)
 				return 0;
 			if (errno == EINTR)
 				continue;
@@ -274,7 +275,7 @@  static bool get_next_msg(struct ubus_context *ctx, int *recv_fd)
 	int r;
 
 	/* receive header + start attribute */
-	r = recv_retry(ctx->sock.fd, &iov, false, recv_fd);
+	r = recv_retry(ctx, &iov, false, recv_fd);
 	if (r <= 0) {
 		if (r < 0)
 			ctx->sock.eof = true;
@@ -298,7 +299,7 @@  static bool get_next_msg(struct ubus_context *ctx, int *recv_fd)
 	iov.iov_base = (char *)ctx->msgbuf.data + sizeof(hdrbuf.data);
 	iov.iov_len = blob_len(ctx->msgbuf.data);
 	if (iov.iov_len > 0 &&
-	    recv_retry(ctx->sock.fd, &iov, true, NULL) <= 0)
+	    recv_retry(ctx, &iov, true, NULL) <= 0)
 		return false;
 
 	return true;
@@ -311,7 +312,7 @@  void __hidden ubus_handle_data(struct uloop_fd *u, unsigned int events)
 
 	while (get_next_msg(ctx, &recv_fd)) {
 		ubus_process_msg(ctx, &ctx->msgbuf, recv_fd);
-		if (uloop_cancelled)
+		if (uloop_cancelled || ctx->cancel_poll)
 			break;
 	}
 
@@ -326,6 +327,7 @@  void __hidden ubus_poll_data(struct ubus_context *ctx, int timeout)
 		.events = POLLIN | POLLERR,
 	};
 
+	ctx->cancel_poll = false;
 	poll(&pfd, 1, timeout ? timeout : -1);
 	ubus_handle_data(&ctx->sock, ULOOP_READ);
 }
diff --git a/libubus-req.c b/libubus-req.c
index db5061c..305e813 100644
--- a/libubus-req.c
+++ b/libubus-req.c
@@ -122,7 +122,7 @@  static void ubus_sync_req_cb(struct ubus_request *req, int ret)
 {
 	req->status_msg = true;
 	req->status_code = ret;
-	uloop_end();
+	req->ctx->cancel_poll = true;
 }
 
 static int64_t get_time_msec(void)
@@ -142,6 +142,7 @@  int ubus_complete_request(struct ubus_context *ctx, struct ubus_request *req,
 	ubus_complete_handler_t complete_cb = req->complete_cb;
 	int status = UBUS_STATUS_NO_DATA;
 	int64_t timeout = 0, time_end = 0;
+	bool prev_uloop_cancelled = uloop_cancelled;
 
 	if (req_timeout)
 		time_end = get_time_msec() + req_timeout;
@@ -149,29 +150,32 @@  int ubus_complete_request(struct ubus_context *ctx, struct ubus_request *req,
 	ubus_complete_request_async(ctx, req);
 	req->complete_cb = ubus_sync_req_cb;
 
+	if (!ctx->stack_depth)
+		uloop_cancelled = false;
+
 	ctx->stack_depth++;
 	while (!req->status_msg) {
-		bool cancelled = uloop_cancelled;
-
-		uloop_cancelled = false;
 		if (req_timeout) {
 			timeout = time_end - get_time_msec();
 			if (timeout <= 0) {
 				ubus_set_req_status(req, UBUS_STATUS_TIMEOUT);
-				uloop_cancelled = cancelled;
 				break;
 			}
 		}
+
 		ubus_poll_data(ctx, (unsigned int) timeout);
 
-		uloop_cancelled = cancelled;
 		if (ctx->sock.eof) {
 			ubus_set_req_status(req, UBUS_STATUS_CONNECTION_FAILED);
+			ctx->cancel_poll = true;
 			break;
 		}
 	}
+
 	ctx->stack_depth--;
 	if (ctx->stack_depth)
+		ctx->cancel_poll = true;
+	else if (prev_uloop_cancelled)
 		uloop_cancelled = true;
 
 	if (req->status_msg)
diff --git a/libubus.h b/libubus.h
index ea53272..4e45cb6 100644
--- a/libubus.h
+++ b/libubus.h
@@ -155,6 +155,7 @@  struct ubus_context {
 
 	uint32_t local_id;
 	uint16_t request_seq;
+	bool cancel_poll;
 	int stack_depth;
 
 	void (*connection_lost)(struct ubus_context *ctx);