diff mbox series

[net,v3,1/7] net/rds: Give fr_state a chance to transition to FRMR_IS_FREE

Message ID 491db13c-3843-b57a-c9c5-9c7e7c18381a@oracle.com
State Accepted
Delegated to: David Miller
Headers show
Series net/rds: RDMA fixes | expand

Commit Message

Gerd Rausch July 16, 2019, 10:28 p.m. UTC
In the context of FRMR (ib_frmr.c):

Memory regions make it onto the "clean_list" via "rds_ib_flush_mr_pool",
after the memory region has been posted for invalidation via
"rds_ib_post_inv".

At that point in time, "fr_state" may still be in state "FRMR_IS_INUSE",
since the only place where "fr_state" transitions to "FRMR_IS_FREE"
is in "rds_ib_mr_cqe_handler", which is triggered by a tasklet.

So in case we notice that "fr_state != FRMR_IS_FREE" (see below),
we wait for "fr_inv_done" to trigger with a maximum of 10msec.
Then we check again, and only put the memory region onto the drop_list
(via "rds_ib_free_frmr") in case the situation remains unchanged.

This avoids the problem of memory-regions bouncing between "clean_list"
and "drop_list" before they even have a chance to be properly invalidated.

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
---
 net/rds/ib_frmr.c | 27 ++++++++++++++++++++++++++-
 net/rds/ib_mr.h   |  1 +
 2 files changed, 27 insertions(+), 1 deletion(-)

Comments

Santosh Shilimkar July 17, 2019, 12:26 a.m. UTC | #1
On 7/16/19 3:28 PM, Gerd Rausch wrote:
> In the context of FRMR (ib_frmr.c):
> 
> Memory regions make it onto the "clean_list" via "rds_ib_flush_mr_pool",
> after the memory region has been posted for invalidation via
> "rds_ib_post_inv".
> 
> At that point in time, "fr_state" may still be in state "FRMR_IS_INUSE",
> since the only place where "fr_state" transitions to "FRMR_IS_FREE"
> is in "rds_ib_mr_cqe_handler", which is triggered by a tasklet.
> 
> So in case we notice that "fr_state != FRMR_IS_FREE" (see below),
> we wait for "fr_inv_done" to trigger with a maximum of 10msec.
> Then we check again, and only put the memory region onto the drop_list
> (via "rds_ib_free_frmr") in case the situation remains unchanged.
> 
> This avoids the problem of memory-regions bouncing between "clean_list"
> and "drop_list" before they even have a chance to be properly invalidated.
> 
> Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
> ---
Thanks for the update.

Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
diff mbox series

Patch

diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 32ae26ed58a0..6038138d6e38 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -75,6 +75,7 @@  static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
 		pool->max_items_soft = pool->max_items;
 
 	frmr->fr_state = FRMR_IS_FREE;
+	init_waitqueue_head(&frmr->fr_inv_done);
 	return ibmr;
 
 out_no_cigar:
@@ -285,6 +286,7 @@  void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 	if (frmr->fr_inv) {
 		frmr->fr_state = FRMR_IS_FREE;
 		frmr->fr_inv = false;
+		wake_up(&frmr->fr_inv_done);
 	}
 
 	atomic_inc(&ic->i_fastreg_wrs);
@@ -345,8 +347,31 @@  struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
 	}
 
 	do {
-		if (ibmr)
+		if (ibmr) {
+			/* Memory regions make it onto the "clean_list" via
+			 * "rds_ib_flush_mr_pool", after the memory region has
+			 * been posted for invalidation via "rds_ib_post_inv".
+			 *
+			 * At that point in time, "fr_state" may still be
+			 * in state "FRMR_IS_INUSE", since the only place where
+			 * "fr_state" transitions to "FRMR_IS_FREE" is in
+			 * is in "rds_ib_mr_cqe_handler", which is
+			 * triggered by a tasklet.
+			 *
+			 * So we wait for "fr_inv_done" to trigger
+			 * and only put memory regions onto the drop_list
+			 * that failed (i.e. not marked "FRMR_IS_FREE").
+			 *
+			 * This avoids the problem of memory-regions bouncing
+			 * between "clean_list" and "drop_list" before they
+			 * even have a chance to be properly invalidated.
+			 */
+			frmr = &ibmr->u.frmr;
+			wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
+			if (frmr->fr_state == FRMR_IS_FREE)
+				break;
 			rds_ib_free_frmr(ibmr, true);
+		}
 		ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
 		if (IS_ERR(ibmr))
 			return ibmr;
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5da12c248431..42daccb7b5eb 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -57,6 +57,7 @@  struct rds_ib_frmr {
 	struct ib_mr		*mr;
 	enum rds_ib_fr_state	fr_state;
 	bool			fr_inv;
+	wait_queue_head_t	fr_inv_done;
 	struct ib_send_wr	fr_wr;
 	unsigned int		dma_npages;
 	unsigned int		sg_byte_len;