diff mbox series

[net-next,01/13] net/mlx5: Hold pages RB tree per VF

Message ID 20200728094411.116386-2-saeedm@mellanox.com
State Accepted
Delegated to: David Miller
Headers show
Series [net-next,01/13] net/mlx5: Hold pages RB tree per VF | expand

Commit Message

Saeed Mahameed July 28, 2020, 9:43 a.m. UTC
From: Eran Ben Elisha <eranbe@mellanox.com>

Per page request event, FW request to allocated or release pages for a
single function. Driver maintains FW pages object per function, so there
is no need to hold one global page data-base. Instead, have a page
data-base per function, which will improve performance release flow in all
cases, especially for "release all pages".

As the range of function IDs is large and not sequential, use xarray to
store a per function ID page data-base, where the function ID is the key.

Upon first allocation of a page to a function ID, create the page
data-base per function. This data-base will be released only at pagealloc
mechanism cleanup.

NIC: ConnectX-4 Lx
CPU: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
Test case: 32 VFs, measure release pages on one VF as part of FLR
Before: 0.021 Sec
After:  0.014 Sec

The improvement depends on amount of VFs and memory utilization
by them. Time measurements above were taken from idle system.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/pagealloc.c   | 142 +++++++++++++-----
 include/linux/mlx5/driver.h                   |   2 +-
 2 files changed, 105 insertions(+), 39 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 5ddd18639a1ee..1b20e3397ddef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -35,6 +35,7 @@ 
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mlx5/driver.h>
+#include <linux/xarray.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
 
@@ -73,15 +74,45 @@  enum {
 	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
 };
 
+static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func_id)
+{
+	struct rb_root *root;
+	int err;
+
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (root)
+		return root;
+
+	root = kzalloc(sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ERR_PTR(-ENOMEM);
+
+	err = xa_insert(&dev->priv.page_root_xa, func_id, root, GFP_KERNEL);
+	if (err) {
+		kfree(root);
+		return ERR_PTR(err);
+	}
+
+	*root = RB_ROOT;
+
+	return root;
+}
+
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 {
-	struct rb_root *root = &dev->priv.page_root;
-	struct rb_node **new = &root->rb_node;
 	struct rb_node *parent = NULL;
+	struct rb_root *root;
+	struct rb_node **new;
 	struct fw_page *nfp;
 	struct fw_page *tfp;
 	int i;
 
+	root = page_root_per_func_id(dev, func_id);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	new = &root->rb_node;
+
 	while (*new) {
 		parent = *new;
 		tfp = rb_entry(parent, struct fw_page, rb_node);
@@ -111,13 +142,20 @@  static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
 	return 0;
 }
 
-static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr,
+				    u32 func_id)
 {
-	struct rb_root *root = &dev->priv.page_root;
-	struct rb_node *tmp = root->rb_node;
 	struct fw_page *result = NULL;
+	struct rb_root *root;
+	struct rb_node *tmp;
 	struct fw_page *tfp;
 
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (WARN_ON_ONCE(!root))
+		return NULL;
+
+	tmp = root->rb_node;
+
 	while (tmp) {
 		tfp = rb_entry(tmp, struct fw_page, rb_node);
 		if (tfp->addr < addr) {
@@ -191,7 +229,13 @@  static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
 static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
 		     bool in_free_list)
 {
-	rb_erase(&fwp->rb_node, &dev->priv.page_root);
+	struct rb_root *root;
+
+	root = xa_load(&dev->priv.page_root_xa, fwp->func_id);
+	if (WARN_ON_ONCE(!root))
+		return;
+
+	rb_erase(&fwp->rb_node, root);
 	if (in_free_list)
 		list_del(&fwp->list);
 	dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
@@ -200,12 +244,12 @@  static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
 	kfree(fwp);
 }
 
-static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id)
 {
 	struct fw_page *fwp;
 	int n;
 
-	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
+	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, func_id);
 	if (!fwp) {
 		mlx5_core_warn_rl(dev, "page not found\n");
 		return;
@@ -340,7 +384,7 @@  static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 
 out_4k:
 	for (i--; i >= 0; i--)
-		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), func_id);
 out_free:
 	kvfree(in);
 	if (notify_fail)
@@ -351,16 +395,19 @@  static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
 			      bool ec_function)
 {
+	struct rb_root *root;
 	struct rb_node *p;
 	int npages = 0;
 
-	p = rb_first(&dev->priv.page_root);
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (WARN_ON_ONCE(!root))
+		return;
+
+	p = rb_first(root);
 	while (p) {
 		struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node);
 
 		p = rb_next(p);
-		if (fwp->func_id != func_id)
-			continue;
 		npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
 		free_fwp(dev, fwp, fwp->free_count);
 	}
@@ -378,6 +425,7 @@  static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 			     u32 *in, int in_size, u32 *out, int out_size)
 {
+	struct rb_root *root;
 	struct fw_page *fwp;
 	struct rb_node *p;
 	u32 func_id;
@@ -391,12 +439,14 @@  static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 	npages = MLX5_GET(manage_pages_in, in, input_num_entries);
 	func_id = MLX5_GET(manage_pages_in, in, function_id);
 
-	p = rb_first(&dev->priv.page_root);
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (WARN_ON_ONCE(!root))
+		return -EEXIST;
+
+	p = rb_first(root);
 	while (p && i < npages) {
 		fwp = rb_entry(p, struct fw_page, rb_node);
 		p = rb_next(p);
-		if (fwp->func_id != func_id)
-			continue;
 
 		MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr);
 		i++;
@@ -446,7 +496,7 @@  static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	}
 
 	for (i = 0; i < num_claimed; i++)
-		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), func_id);
 
 	if (nclaimed)
 		*nclaimed = num_claimed;
@@ -560,35 +610,49 @@  static int optimal_reclaimed_pages(void)
 	return ret;
 }
 
-int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
+				   struct rb_root *root, u16 func_id)
 {
 	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
-	struct fw_page *fwp;
-	struct rb_node *p;
-	int nclaimed = 0;
-	int err = 0;
 
-	do {
-		p = rb_first(&dev->priv.page_root);
-		if (p) {
-			fwp = rb_entry(p, struct fw_page, rb_node);
-			err = reclaim_pages(dev, fwp->func_id,
-					    optimal_reclaimed_pages(),
-					    &nclaimed, mlx5_core_is_ecpf(dev));
-
-			if (err) {
-				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
-					       err);
-				return err;
-			}
-			if (nclaimed)
-				end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+	while (!RB_EMPTY_ROOT(root)) {
+		int nclaimed;
+		int err;
+
+		err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(),
+				    &nclaimed, mlx5_core_is_ecpf(dev));
+		if (err) {
+			mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n",
+				       err, func_id);
+			return err;
 		}
+
+		if (nclaimed)
+			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+
 		if (time_after(jiffies, end)) {
 			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
 			break;
 		}
-	} while (p);
+	}
+
+	return 0;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+	struct rb_root *root;
+	unsigned long id;
+	void *entry;
+
+	xa_for_each(&dev->priv.page_root_xa, id, entry) {
+		root = entry;
+		mlx5_reclaim_root_pages(dev, root, id);
+		xa_erase(&dev->priv.page_root_xa, id);
+		kfree(root);
+	}
+
+	WARN_ON(!xa_empty(&dev->priv.page_root_xa));
 
 	WARN(dev->priv.fw_pages,
 	     "FW pages counter is %d after reclaiming all pages\n",
@@ -605,17 +669,19 @@  int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 
 int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
-	dev->priv.page_root = RB_ROOT;
 	INIT_LIST_HEAD(&dev->priv.free_list);
 	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
 	if (!dev->priv.pg_wq)
 		return -ENOMEM;
 
+	xa_init(&dev->priv.page_root_xa);
+
 	return 0;
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
 {
+	xa_destroy(&dev->priv.page_root_xa);
 	destroy_workqueue(dev->priv.pg_wq);
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6a97ad601991e..a0fcc4d13e93a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -541,7 +541,7 @@  struct mlx5_priv {
 	/* pages stuff */
 	struct mlx5_nb          pg_nb;
 	struct workqueue_struct *pg_wq;
-	struct rb_root		page_root;
+	struct xarray           page_root_xa;
 	int			fw_pages;
 	atomic_t		reg_pages;
 	struct list_head	free_list;