[SRU,0/1,B/gke-5.0] blk-wbt: fix performance regression in wbt scale_up/scale_down
diff mbox

Message ID 20191010174438.25435-2-khalid.elmously@canonical.com
State New
Headers show

Commit Message

Khaled Elmously Oct. 10, 2019, 5:44 p.m. UTC
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

BugLink: https://bugs.launchpad.net/bugs/1847641

scale_up wakes up waiters after scaling up. But after scaling max, it
should not wake up more waiters as waiters will not have anything to
do. This patch fixes this by making scale_up (and also scale_down)
return when threshold is reached.

This bug causes increased fdatasync latency when fdatasync and dd
conv=sync are performed in parallel on 4.19 compared to 4.14. This
bug was introduced during refactoring of blk-wbt code.

Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
Cc: stable@vger.kernel.org
Cc: Josef Bacik <jbacik@fb.com>
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit b84477d3ebb96294f87dc3161e53fa8fe22d9bfd linux-block)
Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
---
 block/blk-rq-qos.c | 14 +++++++++-----
 block/blk-rq-qos.h |  4 ++--
 block/blk-wbt.c    |  6 ++++--
 3 files changed, 15 insertions(+), 9 deletions(-)

Comments

Sultan Alsawaf Oct. 10, 2019, 5:50 p.m. UTC | #1
On Thu, Oct 10, 2019 at 01:44:38PM -0400, Khalid Elmously wrote:
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1847641
> 
> scale_up wakes up waiters after scaling up. But after scaling max, it
> should not wake up more waiters as waiters will not have anything to
> do. This patch fixes this by making scale_up (and also scale_down)
> return when threshold is reached.
> 
> This bug causes increased fdatasync latency when fdatasync and dd
> conv=sync are performed in parallel on 4.19 compared to 4.14. This
> bug was introduced during refactoring of blk-wbt code.
> 
> Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
> Cc: stable@vger.kernel.org
> Cc: Josef Bacik <jbacik@fb.com>
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> (cherry picked from commit b84477d3ebb96294f87dc3161e53fa8fe22d9bfd linux-block)
> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
> ---
>  block/blk-rq-qos.c | 14 +++++++++-----
>  block/blk-rq-qos.h |  4 ++--
>  block/blk-wbt.c    |  6 ++++--
>  3 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
> index c69a1515a7ee..fc5e1ef5bad9 100644
> --- a/block/blk-rq-qos.c
> +++ b/block/blk-rq-qos.c
> @@ -140,24 +140,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd)
>  	return ret;
>  }
>  
> -void rq_depth_scale_up(struct rq_depth *rqd)
> +/* Returns true on success and false if scaling up wasn't possible */
> +bool rq_depth_scale_up(struct rq_depth *rqd)
>  {
>  	/*
>  	 * Hit max in previous round, stop here
>  	 */
>  	if (rqd->scaled_max)
> -		return;
> +		return false;
>  
>  	rqd->scale_step--;
>  
>  	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
> +	return true;
>  }
>  
>  /*
>   * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
> - * had a latency violation.
> + * had a latency violation. Returns true on success and returns false if
> + * scaling down wasn't possible.
>   */
> -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
> +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>  {
>  	/*
>  	 * Stop scaling down when we've hit the limit. This also prevents
> @@ -165,7 +168,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>  	 * keep up.
>  	 */
>  	if (rqd->max_depth == 1)
> -		return;
> +		return false;
>  
>  	if (rqd->scale_step < 0 && hard_throttle)
>  		rqd->scale_step = 0;
> @@ -174,6 +177,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>  
>  	rqd->scaled_max = false;
>  	rq_depth_calc_max_depth(rqd);
> +	return true;
>  }
>  
>  struct rq_qos_wait_data {
> diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
> index 564851889550..ddd0c9e6d53c 100644
> --- a/block/blk-rq-qos.h
> +++ b/block/blk-rq-qos.h
> @@ -124,8 +124,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
>  		 acquire_inflight_cb_t *acquire_inflight_cb,
>  		 cleanup_cb_t *cleanup_cb);
>  bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
> -void rq_depth_scale_up(struct rq_depth *rqd);
> -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
> +bool rq_depth_scale_up(struct rq_depth *rqd);
> +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
>  bool rq_depth_calc_max_depth(struct rq_depth *rqd);
>  
>  void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index fd166fbb0f65..205e566b1ac3 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -307,7 +307,8 @@ static void calc_wb_limits(struct rq_wb *rwb)
>  
>  static void scale_up(struct rq_wb *rwb)
>  {
> -	rq_depth_scale_up(&rwb->rq_depth);
> +	if (!rq_depth_scale_up(&rwb->rq_depth))
> +		return;
>  	calc_wb_limits(rwb);
>  	rwb->unknown_cnt = 0;
>  	rwb_wake_all(rwb);
> @@ -316,7 +317,8 @@ static void scale_up(struct rq_wb *rwb)
>  
>  static void scale_down(struct rq_wb *rwb, bool hard_throttle)
>  {
> -	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
> +	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
> +		return;
>  	calc_wb_limits(rwb);
>  	rwb->unknown_cnt = 0;
>  	rwb_trace_step(rwb, "scale down");
> -- 
> 2.17.1
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team

Acked-by: Sultan Alsawaf <sultan.alsawaf@canonical.com>
Kamal Mostafa Oct. 10, 2019, 5:53 p.m. UTC | #2
LGTM.

Acked-by: Kamal Mostafa <kamal@canonical.com>

 -Kamal

On Thu, Oct 10, 2019 at 01:44:38PM -0400, Khalid Elmously wrote:
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1847641
> 
> scale_up wakes up waiters after scaling up. But after scaling max, it
> should not wake up more waiters as waiters will not have anything to
> do. This patch fixes this by making scale_up (and also scale_down)
> return when threshold is reached.
> 
> This bug causes increased fdatasync latency when fdatasync and dd
> conv=sync are performed in parallel on 4.19 compared to 4.14. This
> bug was introduced during refactoring of blk-wbt code.
> 
> Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
> Cc: stable@vger.kernel.org
> Cc: Josef Bacik <jbacik@fb.com>
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> (cherry picked from commit b84477d3ebb96294f87dc3161e53fa8fe22d9bfd linux-block)
> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
> ---
>  block/blk-rq-qos.c | 14 +++++++++-----
>  block/blk-rq-qos.h |  4 ++--
>  block/blk-wbt.c    |  6 ++++--
>  3 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
> index c69a1515a7ee..fc5e1ef5bad9 100644
> --- a/block/blk-rq-qos.c
> +++ b/block/blk-rq-qos.c
> @@ -140,24 +140,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd)
>  	return ret;
>  }
>  
> -void rq_depth_scale_up(struct rq_depth *rqd)
> +/* Returns true on success and false if scaling up wasn't possible */
> +bool rq_depth_scale_up(struct rq_depth *rqd)
>  {
>  	/*
>  	 * Hit max in previous round, stop here
>  	 */
>  	if (rqd->scaled_max)
> -		return;
> +		return false;
>  
>  	rqd->scale_step--;
>  
>  	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
> +	return true;
>  }
>  
>  /*
>   * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
> - * had a latency violation.
> + * had a latency violation. Returns true on success and returns false if
> + * scaling down wasn't possible.
>   */
> -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
> +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>  {
>  	/*
>  	 * Stop scaling down when we've hit the limit. This also prevents
> @@ -165,7 +168,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>  	 * keep up.
>  	 */
>  	if (rqd->max_depth == 1)
> -		return;
> +		return false;
>  
>  	if (rqd->scale_step < 0 && hard_throttle)
>  		rqd->scale_step = 0;
> @@ -174,6 +177,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>  
>  	rqd->scaled_max = false;
>  	rq_depth_calc_max_depth(rqd);
> +	return true;
>  }
>  
>  struct rq_qos_wait_data {
> diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
> index 564851889550..ddd0c9e6d53c 100644
> --- a/block/blk-rq-qos.h
> +++ b/block/blk-rq-qos.h
> @@ -124,8 +124,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
>  		 acquire_inflight_cb_t *acquire_inflight_cb,
>  		 cleanup_cb_t *cleanup_cb);
>  bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
> -void rq_depth_scale_up(struct rq_depth *rqd);
> -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
> +bool rq_depth_scale_up(struct rq_depth *rqd);
> +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
>  bool rq_depth_calc_max_depth(struct rq_depth *rqd);
>  
>  void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index fd166fbb0f65..205e566b1ac3 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -307,7 +307,8 @@ static void calc_wb_limits(struct rq_wb *rwb)
>  
>  static void scale_up(struct rq_wb *rwb)
>  {
> -	rq_depth_scale_up(&rwb->rq_depth);
> +	if (!rq_depth_scale_up(&rwb->rq_depth))
> +		return;
>  	calc_wb_limits(rwb);
>  	rwb->unknown_cnt = 0;
>  	rwb_wake_all(rwb);
> @@ -316,7 +317,8 @@ static void scale_up(struct rq_wb *rwb)
>  
>  static void scale_down(struct rq_wb *rwb, bool hard_throttle)
>  {
> -	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
> +	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
> +		return;
>  	calc_wb_limits(rwb);
>  	rwb->unknown_cnt = 0;
>  	rwb_trace_step(rwb, "scale down");
> -- 
> 2.17.1
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Connor Kuehl Oct. 10, 2019, 5:54 p.m. UTC | #3
On 10/10/19 10:44 AM, Khalid Elmously wrote:
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1847641
> 
> scale_up wakes up waiters after scaling up. But after scaling max, it
> should not wake up more waiters as waiters will not have anything to
> do. This patch fixes this by making scale_up (and also scale_down)
> return when threshold is reached.
> 
> This bug causes increased fdatasync latency when fdatasync and dd
> conv=sync are performed in parallel on 4.19 compared to 4.14. This
> bug was introduced during refactoring of blk-wbt code.
> 
> Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
> Cc: stable@vger.kernel.org
> Cc: Josef Bacik <jbacik@fb.com>
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> (cherry picked from commit b84477d3ebb96294f87dc3161e53fa8fe22d9bfd linux-block)
> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>

Acked-by: Connor Kuehl <connor.kuehl@canonical.com>

> ---
>   block/blk-rq-qos.c | 14 +++++++++-----
>   block/blk-rq-qos.h |  4 ++--
>   block/blk-wbt.c    |  6 ++++--
>   3 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
> index c69a1515a7ee..fc5e1ef5bad9 100644
> --- a/block/blk-rq-qos.c
> +++ b/block/blk-rq-qos.c
> @@ -140,24 +140,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd)
>   	return ret;
>   }
>   
> -void rq_depth_scale_up(struct rq_depth *rqd)
> +/* Returns true on success and false if scaling up wasn't possible */
> +bool rq_depth_scale_up(struct rq_depth *rqd)
>   {
>   	/*
>   	 * Hit max in previous round, stop here
>   	 */
>   	if (rqd->scaled_max)
> -		return;
> +		return false;
>   
>   	rqd->scale_step--;
>   
>   	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
> +	return true;
>   }
>   
>   /*
>    * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
> - * had a latency violation.
> + * had a latency violation. Returns true on success and returns false if
> + * scaling down wasn't possible.
>    */
> -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
> +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>   {
>   	/*
>   	 * Stop scaling down when we've hit the limit. This also prevents
> @@ -165,7 +168,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>   	 * keep up.
>   	 */
>   	if (rqd->max_depth == 1)
> -		return;
> +		return false;
>   
>   	if (rqd->scale_step < 0 && hard_throttle)
>   		rqd->scale_step = 0;
> @@ -174,6 +177,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
>   
>   	rqd->scaled_max = false;
>   	rq_depth_calc_max_depth(rqd);
> +	return true;
>   }
>   
>   struct rq_qos_wait_data {
> diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
> index 564851889550..ddd0c9e6d53c 100644
> --- a/block/blk-rq-qos.h
> +++ b/block/blk-rq-qos.h
> @@ -124,8 +124,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
>   		 acquire_inflight_cb_t *acquire_inflight_cb,
>   		 cleanup_cb_t *cleanup_cb);
>   bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
> -void rq_depth_scale_up(struct rq_depth *rqd);
> -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
> +bool rq_depth_scale_up(struct rq_depth *rqd);
> +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
>   bool rq_depth_calc_max_depth(struct rq_depth *rqd);
>   
>   void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index fd166fbb0f65..205e566b1ac3 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -307,7 +307,8 @@ static void calc_wb_limits(struct rq_wb *rwb)
>   
>   static void scale_up(struct rq_wb *rwb)
>   {
> -	rq_depth_scale_up(&rwb->rq_depth);
> +	if (!rq_depth_scale_up(&rwb->rq_depth))
> +		return;
>   	calc_wb_limits(rwb);
>   	rwb->unknown_cnt = 0;
>   	rwb_wake_all(rwb);
> @@ -316,7 +317,8 @@ static void scale_up(struct rq_wb *rwb)
>   
>   static void scale_down(struct rq_wb *rwb, bool hard_throttle)
>   {
> -	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
> +	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
> +		return;
>   	calc_wb_limits(rwb);
>   	rwb->unknown_cnt = 0;
>   	rwb_trace_step(rwb, "scale down");
>

Patch
diff mbox

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index c69a1515a7ee..fc5e1ef5bad9 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -140,24 +140,27 @@  bool rq_depth_calc_max_depth(struct rq_depth *rqd)
 	return ret;
 }
 
-void rq_depth_scale_up(struct rq_depth *rqd)
+/* Returns true on success and false if scaling up wasn't possible */
+bool rq_depth_scale_up(struct rq_depth *rqd)
 {
 	/*
 	 * Hit max in previous round, stop here
 	 */
 	if (rqd->scaled_max)
-		return;
+		return false;
 
 	rqd->scale_step--;
 
 	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+	return true;
 }
 
 /*
  * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
+ * had a latency violation. Returns true on success and returns false if
+ * scaling down wasn't possible.
  */
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
 {
 	/*
 	 * Stop scaling down when we've hit the limit. This also prevents
@@ -165,7 +168,7 @@  void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
 	 * keep up.
 	 */
 	if (rqd->max_depth == 1)
-		return;
+		return false;
 
 	if (rqd->scale_step < 0 && hard_throttle)
 		rqd->scale_step = 0;
@@ -174,6 +177,7 @@  void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
 
 	rqd->scaled_max = false;
 	rq_depth_calc_max_depth(rqd);
+	return true;
 }
 
 struct rq_qos_wait_data {
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 564851889550..ddd0c9e6d53c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -124,8 +124,8 @@  void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 		 acquire_inflight_cb_t *acquire_inflight_cb,
 		 cleanup_cb_t *cleanup_cb);
 bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
-void rq_depth_scale_up(struct rq_depth *rqd);
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_scale_up(struct rq_depth *rqd);
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
 bool rq_depth_calc_max_depth(struct rq_depth *rqd);
 
 void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index fd166fbb0f65..205e566b1ac3 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -307,7 +307,8 @@  static void calc_wb_limits(struct rq_wb *rwb)
 
 static void scale_up(struct rq_wb *rwb)
 {
-	rq_depth_scale_up(&rwb->rq_depth);
+	if (!rq_depth_scale_up(&rwb->rq_depth))
+		return;
 	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
 	rwb_wake_all(rwb);
@@ -316,7 +317,8 @@  static void scale_up(struct rq_wb *rwb)
 
 static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 {
-	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
+	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
+		return;
 	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
 	rwb_trace_step(rwb, "scale down");