[v2] read_all: give more time to wait children finish read action

Message ID 20180411094734.10962-1-liwang@redhat.com
State Superseded
Headers show
Series
  • [v2] read_all: give more time to wait children finish read action
Related show

Commit Message

Li Wang April 11, 2018, 9:47 a.m.
1. We get the following worker stalled messges in test:
 # ./read_all -d /sys -q -r 10
   tst_test.c:987: INFO: Timeout per run is 0h 05m 00s
   read_all.c:280: BROK: Worker 26075 is stalled
   read_all.c:280: WARN: Worker 26075 is stalled
   read_all.c:280: WARN: Worker 26079 is stalled
   read_all.c:280: WARN: Worker 26087 is stalled

The reason is that some children are still working on the read I/O but
parent trys to stopping them after visit_dir() immediately. Although
the stop_attemps is 65535, it still sometimes fails.

Instead, we use an exponential backoff way to loop the stop operation
in limited seconds.

2. The sched_work() push action in a infinite loop, here also let it
trys in limited time.

Signed-off-by: Li Wang <liwang@redhat.com>
---
 testcases/kernel/fs/read_all/read_all.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

Comments

Richard Palethorpe April 11, 2018, 11:19 a.m. | #1
Hello Li,

Li Wang writes:

> 1. We get the following worker stalled messges in test:
>  # ./read_all -d /sys -q -r 10
>    tst_test.c:987: INFO: Timeout per run is 0h 05m 00s
>    read_all.c:280: BROK: Worker 26075 is stalled
>    read_all.c:280: WARN: Worker 26075 is stalled
>    read_all.c:280: WARN: Worker 26079 is stalled
>    read_all.c:280: WARN: Worker 26087 is stalled
>
> The reason is that some children are still working on the read I/O but
> parent trys to stopping them after visit_dir() immediately. Although
> the stop_attemps is 65535, it still sometimes fails.
>
> Instead, we use an exponential backoff way to loop the stop operation
> in limited seconds.
>
> 2. The sched_work() push action in a infinite loop, here also let it
> trys in limited time.
>
> Signed-off-by: Li Wang <liwang@redhat.com>
> ---
>  testcases/kernel/fs/read_all/read_all.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/testcases/kernel/fs/read_all/read_all.c b/testcases/kernel/fs/read_all/read_all.c
> index b7ed540..a9f9707 100644
> --- a/testcases/kernel/fs/read_all/read_all.c
> +++ b/testcases/kernel/fs/read_all/read_all.c
> @@ -57,6 +57,8 @@
>  #define BUFFER_SIZE 1024
>  #define MAX_PATH 4096
>  #define MAX_DISPLAY 40
> +#define MICROSECOND 1

Not necessary.

> +#define SECOND MICROSECOND * 1000000
>  
>  struct queue {
>  	sem_t sem;
> @@ -265,20 +267,21 @@ static void spawn_workers(void)
>  static void stop_workers(void)
>  {
>  	const char stop_code[1] = { '\0' };
> -	int i, stop_attempts;
> +	int i, delay = 1;
>  
>  	if (!workers)
>  		return;
>  
>  	for (i = 0; i < worker_count; i++) {
> -		stop_attempts = 0xffff;
>  		if (workers[i].q) {

Maybe change this to:
if (!workers[i].q)
   continue;

To avoid a level of indentation.

>  			while (!queue_push(workers[i].q, stop_code)) {
> -				if (--stop_attempts < 0) {
> +				if (delay < SECOND) {
> +					usleep(delay);
> +					delay *= 2;
> +				} else {
>  					tst_brk(TBROK,
>  						"Worker %d is stalled",
>  						workers[i].pid);
> -					break;
>  				}
>  			}
>  		}
> @@ -295,7 +298,7 @@ static void stop_workers(void)
>  static void sched_work(const char *path)
>  {
>  	static int cur;
> -	int push_attempts = 0, pushed;
> +	int push_attempts = 0, pushed, delay = 1;
>  
>  	while (1) {
>  		pushed = queue_push(workers[cur].q, path);
> @@ -306,9 +309,14 @@ static void sched_work(const char *path)
>  		if (pushed)
>  			break;
>  
> -		if (++push_attempts > worker_count) {
> -			usleep(100);
> -			push_attempts = 0;
> +		if (delay < SECOND) {
> +			push_attempts++;
> +			usleep(delay);
> +			delay *= 2;
> +		} else {
> +			tst_brk(TBROK,
> +				"Attempts %d times but still failed to push %s",
                                 ^ Attempted

> +				push_attempts, path);
>  		}
>  	}
>  }

Maybe you could put the "if (delaly < SECOND) ..." into a function?

Otherwise this looks good to me. There are some other things I want to
change on this test, but we can leave those for another patch.
Li Wang April 12, 2018, 3:01 a.m. | #2
Richard Palethorpe <rpalethorpe@suse.de> wrote:

>
> >  #define MAX_DISPLAY 40
> > +#define MICROSECOND 1
>
> Not necessary.
>
​
Ok.
​


> >
> >       for (i = 0; i < worker_count; i++) {
> > -             stop_attempts = 0xffff;
> >               if (workers[i].q) {
>
> Maybe change this to:
> if (!workers[i].q)
>    continue;
>
> To avoid a level of indentation.
>

​Sounds good.
​

> +                     tst_brk(TBROK,
> > +                             "Attempts %d times but still failed to
> push %s",
>                                  ^ Attempted
>

​Good catch.
​


>
> > +                             push_attempts, path);
> >               }
> >       }
> >  }
>
> Maybe you could put the "if (delaly < SECOND) ..." into a function?
>

​Firstly, I was thinking to take use of my new macros, but
consider that have not been reviewed. So I tend to keep
it as original, then replace them entirely after ltp gets
a generic loop functionality in library.



>
> Otherwise this looks good to me. There are some other things I want to
> change on this test, but we can leave those for another patch.
>

​Thanks, Patch v3 is coming...
​

Patch

diff --git a/testcases/kernel/fs/read_all/read_all.c b/testcases/kernel/fs/read_all/read_all.c
index b7ed540..a9f9707 100644
--- a/testcases/kernel/fs/read_all/read_all.c
+++ b/testcases/kernel/fs/read_all/read_all.c
@@ -57,6 +57,8 @@ 
 #define BUFFER_SIZE 1024
 #define MAX_PATH 4096
 #define MAX_DISPLAY 40
+#define MICROSECOND 1
+#define SECOND MICROSECOND * 1000000
 
 struct queue {
 	sem_t sem;
@@ -265,20 +267,21 @@  static void spawn_workers(void)
 static void stop_workers(void)
 {
 	const char stop_code[1] = { '\0' };
-	int i, stop_attempts;
+	int i, delay = 1;
 
 	if (!workers)
 		return;
 
 	for (i = 0; i < worker_count; i++) {
-		stop_attempts = 0xffff;
 		if (workers[i].q) {
 			while (!queue_push(workers[i].q, stop_code)) {
-				if (--stop_attempts < 0) {
+				if (delay < SECOND) {
+					usleep(delay);
+					delay *= 2;
+				} else {
 					tst_brk(TBROK,
 						"Worker %d is stalled",
 						workers[i].pid);
-					break;
 				}
 			}
 		}
@@ -295,7 +298,7 @@  static void stop_workers(void)
 static void sched_work(const char *path)
 {
 	static int cur;
-	int push_attempts = 0, pushed;
+	int push_attempts = 0, pushed, delay = 1;
 
 	while (1) {
 		pushed = queue_push(workers[cur].q, path);
@@ -306,9 +309,14 @@  static void sched_work(const char *path)
 		if (pushed)
 			break;
 
-		if (++push_attempts > worker_count) {
-			usleep(100);
-			push_attempts = 0;
+		if (delay < SECOND) {
+			push_attempts++;
+			usleep(delay);
+			delay *= 2;
+		} else {
+			tst_brk(TBROK,
+				"Attempts %d times but still failed to push %s",
+				push_attempts, path);
 		}
 	}
 }