diff mbox series

[4/6] network_ipc async_thread: fix hang on failed update

Message ID 20220422235944.2808227-4-dominique.martinet@atmark-techno.com
State Changes Requested
Headers show
Series [1/6] sigchld_handler: report child exit status correctly | expand

Commit Message

Dominique MARTINET April 22, 2022, 11:59 p.m. UTC
if a update is large and fails, it's likely write_image would fail.
Old code would just exit there, quitting swupdate but a recent cleanup
made this just exit async_thread without calling the end function, so
the caller would be left hanging waiting for an update taht will never
finish.

This can easily be reproduced with a script that exits immediately
with a failure code followed by another 'large' file,
with swupdate -i or swupdate_client

Fixes: 626d83f8819d ("IPC: do not call exit")
Signed-off-by: Dominique Martinet <dominique.martinet@atmark-techno.com>
---

I can provide a failing sw-description if required, but basically a cpio
with
 - a small script that fails immediately
 - whatever image that would be ignored
fails reliably for me

 ipc/network_ipc-if.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

Comments

Dominique MARTINET May 6, 2022, 12:28 a.m. UTC | #1
Dominique Martinet wrote on Sat, Apr 23, 2022 at 08:59:42AM +0900:
> if a update is large and fails, it's likely write_image would fail.
> Old code would just exit there, quitting swupdate but a recent cleanup
> made this just exit async_thread without calling the end function, so
> the caller would be left hanging waiting for an update taht will never

typo here: tath -> that
Stefano Babic May 6, 2022, 7:04 a.m. UTC | #2
On 23.04.22 01:59, Dominique Martinet wrote:
> if a update is large and fails, it's likely write_image would fail.
> Old code would just exit there, quitting swupdate but a recent cleanup
> made this just exit async_thread without calling the end function, so
> the caller would be left hanging waiting for an update taht will never
> finish.
> 
> This can easily be reproduced with a script that exits immediately
> with a failure code followed by another 'large' file,
> with swupdate -i or swupdate_client
> 
> Fixes: 626d83f8819d ("IPC: do not call exit")
> Signed-off-by: Dominique Martinet <dominique.martinet@atmark-techno.com>
> ---
> 
> I can provide a failing sw-description if required, but basically a cpio
> with
>   - a small script that fails immediately
>   - whatever image that would be ignored
> fails reliably for me
> 
>   ipc/network_ipc-if.c | 17 +++++++++++------
>   1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/ipc/network_ipc-if.c b/ipc/network_ipc-if.c
> index c8a6cd02c72f..bf946648edcf 100644
> --- a/ipc/network_ipc-if.c
> +++ b/ipc/network_ipc-if.c
> @@ -11,6 +11,7 @@
>   #include <errno.h>
>   #include <signal.h>
>   #include <pthread.h>
> +#include <inttypes.h>
>   #include "network_ipc.h"
>   
>   static pthread_t async_thread_id;
> @@ -44,7 +45,8 @@ static void *swupdate_async_thread(void *data)
>   
>   	if (pthread_sigmask(SIG_BLOCK, &sigpipe_mask, &saved_mask) == -1) {
>   		perror("pthread_sigmask");
> -		pthread_exit((void *)-1);
> +		msg.data.status.last_result = FAILURE;
> +		goto out;
>   	}
>   	/* Start writing the image until end */
>   
> @@ -56,7 +58,8 @@ static void *swupdate_async_thread(void *data)
>   		if (size) {
>   			if (swupdate_image_write(pbuf, size) != size) {
>   				perror("swupdate_image_write failed");
> -				pthread_exit((void *)-1);
> +				msg.data.status.last_result = FAILURE;
> +				goto out;
>   			}
>   		}
>   	} while(size > 0);
> @@ -69,20 +72,22 @@ static void *swupdate_async_thread(void *data)
>   
>   	swupdate_result = ipc_wait_for_complete(rq->get);
>   
> -	handle = 0;
> -
>   	if (sigtimedwait(&sigpipe_mask, 0, &zerotime) == -1) {
>   		// currently ignored
>   	}
>   
>   	if (pthread_sigmask(SIG_SETMASK, &saved_mask, 0) == -1) {
> -		  perror("pthread_sigmask");
> +		perror("pthread_sigmask");
> +		msg.data.status.last_result = FAILURE;
> +		goto out;
>   	}
>   
> +out:
> +	handle = 0;
>   	if (rq->end)
>   		rq->end((RECOVERY_STATUS)swupdate_result);
>   
> -	pthread_exit(NULL);
> +	pthread_exit((void*)(intptr_t)(msg.data.status.last_result == SUCCESS));
>   }
>   
>   /*

Reviewed-by: Stefano babic <sbabic@denx.de>

Best regards,
Stefano Babic
Stefano Babic May 6, 2022, 7:05 a.m. UTC | #3
On 06.05.22 02:28, Dominique Martinet wrote:
> Dominique Martinet wrote on Sat, Apr 23, 2022 at 08:59:42AM +0900:
>> if a update is large and fails, it's likely write_image would fail.
>> Old code would just exit there, quitting swupdate but a recent cleanup
>> made this just exit async_thread without calling the end function, so
>> the caller would be left hanging waiting for an update taht will never
> 
> typo here: tath -> that
> 
Thanks, I fix myselfe during merge.

Best regards,
Stefano
Stefano Babic May 6, 2022, 7:17 a.m. UTC | #4
On 06.05.22 09:04, Stefano Babic wrote:
> On 23.04.22 01:59, Dominique Martinet wrote:
>> if a update is large and fails, it's likely write_image would fail.
>> Old code would just exit there, quitting swupdate but a recent cleanup
>> made this just exit async_thread without calling the end function, so
>> the caller would be left hanging waiting for an update taht will never
>> finish.
>>
>> This can easily be reproduced with a script that exits immediately
>> with a failure code followed by another 'large' file,
>> with swupdate -i or swupdate_client
>>
>> Fixes: 626d83f8819d ("IPC: do not call exit")
>> Signed-off-by: Dominique Martinet <dominique.martinet@atmark-techno.com>
>> ---
>>
>> I can provide a failing sw-description if required, but basically a cpio
>> with
>>   - a small script that fails immediately
>>   - whatever image that would be ignored
>> fails reliably for me
>>
>>   ipc/network_ipc-if.c | 17 +++++++++++------
>>   1 file changed, 11 insertions(+), 6 deletions(-)
>>
>> diff --git a/ipc/network_ipc-if.c b/ipc/network_ipc-if.c
>> index c8a6cd02c72f..bf946648edcf 100644
>> --- a/ipc/network_ipc-if.c
>> +++ b/ipc/network_ipc-if.c
>> @@ -11,6 +11,7 @@
>>   #include <errno.h>
>>   #include <signal.h>
>>   #include <pthread.h>
>> +#include <inttypes.h>
>>   #include "network_ipc.h"
>>   static pthread_t async_thread_id;
>> @@ -44,7 +45,8 @@ static void *swupdate_async_thread(void *data)
>>       if (pthread_sigmask(SIG_BLOCK, &sigpipe_mask, &saved_mask) == -1) {
>>           perror("pthread_sigmask");
>> -        pthread_exit((void *)-1);
>> +        msg.data.status.last_result = FAILURE;

Something wrong here - where is definded msg ? I get compiler errors:

  CC      ipc/network_ipc-if.o
ipc/network_ipc-if.c: In function ‘swupdate_async_thread’:
ipc/network_ipc-if.c:48:3: error: ‘msg’ undeclared (first use in this 
function)
    48 |   msg.data.status.last_result = FAILURE;


See https://source.denx.de/swupdate/swupdate/-/jobs/431802

Best regards,
Stefano

>> +        goto out;
>>       }
>>       /* Start writing the image until end */
>> @@ -56,7 +58,8 @@ static void *swupdate_async_thread(void *data)
>>           if (size) {
>>               if (swupdate_image_write(pbuf, size) != size) {
>>                   perror("swupdate_image_write failed");
>> -                pthread_exit((void *)-1);
>> +                msg.data.status.last_result = FAILURE;
>> +                goto out;
>>               }
>>           }
>>       } while(size > 0);
>> @@ -69,20 +72,22 @@ static void *swupdate_async_thread(void *data)
>>       swupdate_result = ipc_wait_for_complete(rq->get);
>> -    handle = 0;
>> -
>>       if (sigtimedwait(&sigpipe_mask, 0, &zerotime) == -1) {
>>           // currently ignored
>>       }
>>       if (pthread_sigmask(SIG_SETMASK, &saved_mask, 0) == -1) {
>> -          perror("pthread_sigmask");
>> +        perror("pthread_sigmask");
>> +        msg.data.status.last_result = FAILURE;
>> +        goto out;
>>       }
>> +out:
>> +    handle = 0;
>>       if (rq->end)
>>           rq->end((RECOVERY_STATUS)swupdate_result);
>> -    pthread_exit(NULL);
>> +    pthread_exit((void*)(intptr_t)(msg.data.status.last_result == 
>> SUCCESS));
>>   }
>>   /*
> 
> Reviewed-by: Stefano babic <sbabic@denx.de>
> 
> Best regards,
> Stefano Babic
> 

However,
Dominique MARTINET May 6, 2022, 7:33 a.m. UTC | #5
Stefano Babic wrote on Fri, May 06, 2022 at 09:17:08AM +0200:
> > > @@ -44,7 +45,8 @@ static void *swupdate_async_thread(void *data)
> > >       if (pthread_sigmask(SIG_BLOCK, &sigpipe_mask, &saved_mask) == -1) {
> > >           perror("pthread_sigmask");
> > > -        pthread_exit((void *)-1);
> > > +        msg.data.status.last_result = FAILURE;
> 
> Something wrong here - where is definded msg ? I get compiler errors:
> 
>  CC      ipc/network_ipc-if.o
> ipc/network_ipc-if.c: In function ‘swupdate_async_thread’:
> ipc/network_ipc-if.c:48:3: error: ‘msg’ undeclared (first use in this
> function)
>    48 |   msg.data.status.last_result = FAILURE;
> 
> 
> See https://source.denx.de/swupdate/swupdate/-/jobs/431802

Sorry, this came from a failed rebase... msg.data.status is leftover
from the progress ipc patch that I had removed and fixed but I sent the
wrong patch.

Sending a v2 now, this should be updating 'swupdate_result' which can be
compared to SUCCESS at return time.
Stefano Babic May 6, 2022, 7:35 a.m. UTC | #6
On 06.05.22 09:33, Dominique Martinet wrote:
> Stefano Babic wrote on Fri, May 06, 2022 at 09:17:08AM +0200:
>>>> @@ -44,7 +45,8 @@ static void *swupdate_async_thread(void *data)
>>>>        if (pthread_sigmask(SIG_BLOCK, &sigpipe_mask, &saved_mask) == -1) {
>>>>            perror("pthread_sigmask");
>>>> -        pthread_exit((void *)-1);
>>>> +        msg.data.status.last_result = FAILURE;
>>
>> Something wrong here - where is definded msg ? I get compiler errors:
>>
>>   CC      ipc/network_ipc-if.o
>> ipc/network_ipc-if.c: In function ‘swupdate_async_thread’:
>> ipc/network_ipc-if.c:48:3: error: ‘msg’ undeclared (first use in this
>> function)
>>     48 |   msg.data.status.last_result = FAILURE;
>>
>>
>> See https://source.denx.de/swupdate/swupdate/-/jobs/431802
> 
> Sorry, this came from a failed rebase... msg.data.status is leftover
> from the progress ipc patch that I had removed and fixed but I sent the
> wrong patch.
> 
> Sending a v2 now, this should be updating 'swupdate_result' which can be
> compared to SUCCESS at return time.
> 

Ok, thanks

Best regards,
Stefano Babic
diff mbox series

Patch

diff --git a/ipc/network_ipc-if.c b/ipc/network_ipc-if.c
index c8a6cd02c72f..bf946648edcf 100644
--- a/ipc/network_ipc-if.c
+++ b/ipc/network_ipc-if.c
@@ -11,6 +11,7 @@ 
 #include <errno.h>
 #include <signal.h>
 #include <pthread.h>
+#include <inttypes.h>
 #include "network_ipc.h"
 
 static pthread_t async_thread_id;
@@ -44,7 +45,8 @@  static void *swupdate_async_thread(void *data)
 
 	if (pthread_sigmask(SIG_BLOCK, &sigpipe_mask, &saved_mask) == -1) {
 		perror("pthread_sigmask");
-		pthread_exit((void *)-1);
+		msg.data.status.last_result = FAILURE;
+		goto out;
 	}
 	/* Start writing the image until end */
 
@@ -56,7 +58,8 @@  static void *swupdate_async_thread(void *data)
 		if (size) {
 			if (swupdate_image_write(pbuf, size) != size) {
 				perror("swupdate_image_write failed");
-				pthread_exit((void *)-1);
+				msg.data.status.last_result = FAILURE;
+				goto out;
 			}
 		}
 	} while(size > 0);
@@ -69,20 +72,22 @@  static void *swupdate_async_thread(void *data)
 
 	swupdate_result = ipc_wait_for_complete(rq->get);
 
-	handle = 0;
-
 	if (sigtimedwait(&sigpipe_mask, 0, &zerotime) == -1) {
 		// currently ignored
 	}
 
 	if (pthread_sigmask(SIG_SETMASK, &saved_mask, 0) == -1) {
-		  perror("pthread_sigmask");
+		perror("pthread_sigmask");
+		msg.data.status.last_result = FAILURE;
+		goto out;
 	}
 
+out:
+	handle = 0;
 	if (rq->end)
 		rq->end((RECOVERY_STATUS)swupdate_result);
 
-	pthread_exit(NULL);
+	pthread_exit((void*)(intptr_t)(msg.data.status.last_result == SUCCESS));
 }
 
 /*