Patchwork [V2,05/10] RDMA/cxgb4: Add DB Overflow Avoidance.

login
register
mail settings
Submitter Vipul Pandya
Date Oct. 24, 2011, 3:12 p.m.
Message ID <1319469151-20748-1-git-send-email-vipul@chelsio.com>
Download mbox | patch
Permalink /patch/121373/
State Awaiting Upstream
Delegated to: David Miller
Headers show

Comments

Vipul Pandya - Oct. 24, 2011, 3:12 p.m.
- get FULL/EMPTY/DROP events from LLD

        - on FULL event, disable normal user mode DB rings.

        - add modify_qp semantics to allow user processes to call into
        the kernel to ring doobells without overflowing.

        Add DB Full/Empty/Drop stats.

        Mark queues when created indicating the doorbell state.

        If we're in the middle of db overflow avoidance, then newly created
        queues should start out in this mode.

        Bump the C4IW_UVERBS_ABI_VERSION to 2 so the user mode library can
        know if the driver supports the kernel mode db ringing.

Signed-off-by: Vipul Pandya <vipul@chelsio.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
V2: Bump C4IW_UVERBS_ABI_VERSION to 2

 drivers/infiniband/hw/cxgb4/device.c   |   84 +++++++++++++++++++++++++++++--
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |   37 ++++++++++++--
 drivers/infiniband/hw/cxgb4/qp.c       |   51 +++++++++++++++++++-
 drivers/infiniband/hw/cxgb4/user.h     |    2 +-
 4 files changed, 162 insertions(+), 12 deletions(-)

Patch

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 8483111..9062ed9 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -44,6 +44,12 @@  MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
+struct uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+	struct c4iw_dev *dev;
+};
+
 static LIST_HEAD(uld_ctx_list);
 static DEFINE_MUTEX(dev_mutex);
 
@@ -263,6 +269,9 @@  static int stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu\n",
 			dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
 			dev->rdev.stats.ocqp.max);
+	seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
+	seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
+	seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
 	return 0;
 }
 
@@ -283,6 +292,9 @@  static ssize_t stats_clear(struct file *file, const char __user *buf,
 	dev->rdev.stats.pbl.max = 0;
 	dev->rdev.stats.rqt.max = 0;
 	dev->rdev.stats.ocqp.max = 0;
+	dev->rdev.stats.db_full = 0;
+	dev->rdev.stats.db_empty = 0;
+	dev->rdev.stats.db_drop = 0;
 	mutex_unlock(&dev->rdev.stats.lock);
 	return count;
 }
@@ -443,12 +455,6 @@  static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 	c4iw_destroy_resource(&rdev->resource);
 }
 
-struct uld_ctx {
-	struct list_head entry;
-	struct cxgb4_lld_info lldi;
-	struct c4iw_dev *dev;
-};
-
 static void c4iw_dealloc(struct uld_ctx *ctx)
 {
 	c4iw_rdev_close(&ctx->dev->rdev);
@@ -514,6 +520,7 @@  static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 	idr_init(&devp->mmidr);
 	spin_lock_init(&devp->lock);
 	mutex_init(&devp->rdev.stats.lock);
+	mutex_init(&devp->db_mutex);
 
 	if (c4iw_debugfs_root) {
 		devp->debugfs_root = debugfs_create_dir(
@@ -659,11 +666,76 @@  static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 	return 0;
 }
 
+static int disable_qp_db(int id, void *p, void *data)
+{
+	struct c4iw_qp *qp = p;
+
+	t4_disable_wq_db(&qp->wq);
+	return 0;
+}
+
+static void stop_queues(struct uld_ctx *ctx)
+{
+	spin_lock_irq(&ctx->dev->lock);
+	ctx->dev->db_state = FLOW_CONTROL;
+	idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+	spin_unlock_irq(&ctx->dev->lock);
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+	struct c4iw_qp *qp = p;
+
+	t4_enable_wq_db(&qp->wq);
+	return 0;
+}
+
+static void resume_queues(struct uld_ctx *ctx)
+{
+	spin_lock_irq(&ctx->dev->lock);
+	ctx->dev->db_state = NORMAL;
+	idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+	spin_unlock_irq(&ctx->dev->lock);
+}
+
+static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
+{
+	struct uld_ctx *ctx = handle;
+
+	switch (control) {
+	case CXGB4_CONTROL_DB_FULL:
+		stop_queues(ctx);
+		mutex_lock(&ctx->dev->rdev.stats.lock);
+		ctx->dev->rdev.stats.db_full++;
+		mutex_unlock(&ctx->dev->rdev.stats.lock);
+		break;
+	case CXGB4_CONTROL_DB_EMPTY:
+		resume_queues(ctx);
+		mutex_lock(&ctx->dev->rdev.stats.lock);
+		ctx->dev->rdev.stats.db_empty++;
+		mutex_unlock(&ctx->dev->rdev.stats.lock);
+		break;
+	case CXGB4_CONTROL_DB_DROP:
+		printk(KERN_WARNING MOD "%s: Fatal DB DROP\n",
+		       pci_name(ctx->lldi.pdev));
+		mutex_lock(&ctx->dev->rdev.stats.lock);
+		ctx->dev->rdev.stats.db_drop++;
+		mutex_unlock(&ctx->dev->rdev.stats.lock);
+		break;
+	default:
+		printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
+		       pci_name(ctx->lldi.pdev), control);
+		break;
+	}
+	return 0;
+}
+
 static struct cxgb4_uld_info c4iw_uld_info = {
 	.name = DRV_NAME,
 	.add = c4iw_uld_add,
 	.rx_handler = c4iw_uld_rx_handler,
 	.state_change = c4iw_uld_state_change,
+	.control = c4iw_uld_control,
 };
 
 static int __init c4iw_init_module(void)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index ec7c848..1924c19 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -117,6 +117,9 @@  struct c4iw_stats {
 	struct c4iw_stat pbl;
 	struct c4iw_stat rqt;
 	struct c4iw_stat ocqp;
+	u64  db_full;
+	u64  db_empty;
+	u64  db_drop;
 };
 
 struct c4iw_rdev {
@@ -192,6 +195,12 @@  static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
 	return wr_waitp->ret;
 }
 
+enum db_state {
+	NORMAL = 0,
+	FLOW_CONTROL = 1,
+	RECOVERY = 2
+};
+
 struct c4iw_dev {
 	struct ib_device ibdev;
 	struct c4iw_rdev rdev;
@@ -200,7 +209,9 @@  struct c4iw_dev {
 	struct idr qpidr;
 	struct idr mmidr;
 	spinlock_t lock;
+	struct mutex db_mutex;
 	struct dentry *debugfs_root;
+	enum db_state db_state;
 };
 
 static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -228,8 +239,8 @@  static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
 	return idr_find(&rhp->mmidr, mmid);
 }
 
-static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
-				void *handle, u32 id)
+static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+				 void *handle, u32 id, int lock)
 {
 	int ret;
 	int newid;
@@ -237,15 +248,29 @@  static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
 	do {
 		if (!idr_pre_get(idr, GFP_KERNEL))
 			return -ENOMEM;
-		spin_lock_irq(&rhp->lock);
+		if (lock)
+			spin_lock_irq(&rhp->lock);
 		ret = idr_get_new_above(idr, handle, id, &newid);
 		BUG_ON(newid != id);
-		spin_unlock_irq(&rhp->lock);
+		if (lock)
+			spin_unlock_irq(&rhp->lock);
 	} while (ret == -EAGAIN);
 
 	return ret;
 }
 
+static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+				void *handle, u32 id)
+{
+	return _insert_handle(rhp, idr, handle, id, 1);
+}
+
+static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
+				       void *handle, u32 id)
+{
+	return _insert_handle(rhp, idr, handle, id, 0);
+}
+
 static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
 {
 	spin_lock_irq(&rhp->lock);
@@ -369,6 +394,8 @@  struct c4iw_qp_attributes {
 	struct c4iw_ep *llp_stream_handle;
 	u8 layer_etype;
 	u8 ecode;
+	u16 sq_db_inc;
+	u16 rq_db_inc;
 };
 
 struct c4iw_qp {
@@ -443,6 +470,8 @@  static inline void insert_mmap(struct c4iw_ucontext *ucontext,
 
 enum c4iw_qp_attr_mask {
 	C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
+	C4IW_QP_ATTR_SQ_DB = 1<<1,
+	C4IW_QP_ATTR_RQ_DB = 1<<2,
 	C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
 	C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
 	C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 74df98e..36fc94d 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -34,6 +34,10 @@ 
 
 #include "iw_cxgb4.h"
 
+static int db_delay_usecs = 1;
+module_param(db_delay_usecs, int, 0644);
+MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
+
 static int ocqp_support = 1;
 module_param(ocqp_support, int, 0644);
 MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
@@ -1117,6 +1121,29 @@  out:
 	return ret;
 }
 
+/*
+ * Called by the library when the qp has user dbs disabled due to
+ * a DB_FULL condition.  This function will single-thread all user
+ * DB rings to avoid overflowing the hw db-fifo.
+ */
+static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
+{
+	int delay = db_delay_usecs;
+
+	mutex_lock(&qhp->rhp->db_mutex);
+	do {
+		if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 768) {
+			writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
+			break;
+		}
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(delay));
+		delay = min(delay << 1, 200000);
+	} while (1);
+	mutex_unlock(&qhp->rhp->db_mutex);
+	return 0;
+}
+
 int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 		   enum c4iw_qp_attr_mask mask,
 		   struct c4iw_qp_attributes *attrs,
@@ -1165,6 +1192,15 @@  int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 		qhp->attr = newattr;
 	}
 
+	if (mask & C4IW_QP_ATTR_SQ_DB) {
+		ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
+		goto out;
+	}
+	if (mask & C4IW_QP_ATTR_RQ_DB) {
+		ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
+		goto out;
+	}
+
 	if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
 		goto out;
 	if (qhp->attr.state == attrs->next_state)
@@ -1454,7 +1490,11 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	init_waitqueue_head(&qhp->wait);
 	atomic_set(&qhp->refcnt, 1);
 
-	ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+	spin_lock_irq(&rhp->lock);
+	if (rhp->db_state != NORMAL)
+		t4_disable_wq_db(&qhp->wq);
+	ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+	spin_unlock_irq(&rhp->lock);
 	if (ret)
 		goto err2;
 
@@ -1598,6 +1638,15 @@  int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 			 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
 			 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
 
+	/*
+	 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
+	 * ringing the queue db when we're in DB_FULL mode.
+	 */
+	attrs.sq_db_inc = attr->sq_psn;
+	attrs.rq_db_inc = attr->rq_psn;
+	mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
+	mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+
 	return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
 }
 
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index e6669d5..32b754c 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@ 
 #ifndef __C4IW_USER_H__
 #define __C4IW_USER_H__
 
-#define C4IW_UVERBS_ABI_VERSION	1
+#define C4IW_UVERBS_ABI_VERSION	2
 
 /*
  * Make sure that all structs defined in this file remain laid out so