Message ID | 152275372705.1026.11963048035925660701.stgit@firesoul |
---|---|
State | Deferred, archived |
Delegated to: | David Miller |
Headers | show |
Series | XDP redirect memory return API | expand |
On 03/04/2018 2:08 PM, Jesper Dangaard Brouer wrote: > This patch shows how it is possible to have both the driver local page > cache, which uses elevated refcnt for "catching"/avoiding SKB > put_page returns the page through the page allocator. And at the > same time, have pages getting returned to the page_pool from > ndp_xdp_xmit DMA completion. > > The performance improvement for XDP_REDIRECT in this patch is really > good. Especially considering that (currently) the xdp_return_frame > API and page_pool_put_page() does per frame operations of both > rhashtable ID-lookup and locked return into (page_pool) ptr_ring. > (It is the plan to remove these per frame operation in a followup > patchset). > > The benchmark performed was RX on mlx5 and XDP_REDIRECT out ixgbe, > with xdp_redirect_map (using devmap) . And the target/maximum > capability of ixgbe is 13Mpps (on this HW setup). > > Before this patch for mlx5, XDP redirected frames were returned via > the page allocator. The single flow performance was 6Mpps, and if I > started two flows the collective performance drop to 4Mpps, because we > hit the page allocator lock (further negative scaling occurs). > > Two test scenarios need to be covered, for xdp_return_frame API, which > is DMA-TX completion running on same-CPU or cross-CPU free/return. > Results were same-CPU=10Mpps, and cross-CPU=12Mpps. This is very > close to our 13Mpps max target. > > The reason max target isn't reached in cross-CPU test, is likely due > to RX-ring DMA unmap/map overhead (which doesn't occur in ixgbe to > ixgbe testing). It is also planned to remove this unnecessary DMA > unmap in a later patchset > > V2: Adjustments requested by Tariq > - Changed page_pool_create return codes not return NULL, only > ERR_PTR, as this simplifies err handling in drivers. > - Save a branch in mlx5e_page_release > - Correct page_pool size calc for MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ > > V5: Updated patch desc > > V8: Adjust for b0cedc844c00 ("net/mlx5e: Remove rq_headroom field from params") > V9: > - Adjust for 121e89275471 ("net/mlx5e: Refactor RQ XDP_TX indication") > - Adjust for 73281b78a37a ("net/mlx5e: Derive Striding RQ size from MTU") > - Correct handling if page_pool_create fail for MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ > > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> > Reviewed-by: Tariq Toukan <tariqt@mellanox.com> > Acked-by: Saeed Mahameed <saeedm@mellanox.com> > --- > drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++ > drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 41 +++++++++++++++++---- > drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 16 ++++++-- > 3 files changed, 48 insertions(+), 12 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h > index 1a05d1072c5e..3317a4da87cb 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h > @@ -53,6 +53,8 @@ > #include "mlx5_core.h" > #include "en_stats.h" > > +struct page_pool; > + > #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) > > #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) > @@ -534,6 +536,7 @@ struct mlx5e_rq { > unsigned int hw_mtu; > struct mlx5e_xdpsq xdpsq; > DECLARE_BITMAP(flags, 8); > + struct page_pool *page_pool; > > /* control */ > struct mlx5_wq_ctrl wq_ctrl; > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > index 13c1e61258a7..d0f2cd86ef32 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > @@ -35,6 +35,7 @@ > #include <linux/mlx5/fs.h> > #include <net/vxlan.h> > #include <linux/bpf.h> > +#include <net/page_pool.h> > #include "eswitch.h" > #include "en.h" > #include "en_tc.h" > @@ -389,10 +390,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, > struct mlx5e_rq_param *rqp, > struct mlx5e_rq *rq) > { > + struct page_pool_params pp_params = { 0 }; > struct mlx5_core_dev *mdev = c->mdev; > void *rqc = rqp->rqc; > void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); > - u32 byte_count; > + u32 byte_count, pool_size; > int npages; > int wq_sz; > int err; > @@ -432,9 +434,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, > > rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; > rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); > + pool_size = 1 << params->log_rq_mtu_frames; > > switch (rq->wq_type) { > case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: > + > + pool_size = pool_size * MLX5_MPWRQ_PAGES_PER_WQE; For rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, please use: pool_size = 1 << params->log_rq_mtu_frames; For rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, please use: pool_size = MLX5_MPWRQ_PAGES_PER_WQ * mlx5e_mpwqe_get_log_rq_size(params); Thanks, Tariq
On Wed, 4 Apr 2018 16:12:14 +0300 Tariq Toukan <tariqt@mellanox.com> wrote: > > @@ -432,9 +434,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, > > > > rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; > > rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); > > + pool_size = 1 << params->log_rq_mtu_frames; > > > > switch (rq->wq_type) { > > case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: > > + > > + pool_size = pool_size * MLX5_MPWRQ_PAGES_PER_WQE; > > For rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, please use: > pool_size = 1 << params->log_rq_mtu_frames; > > For rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, please use: > pool_size = MLX5_MPWRQ_PAGES_PER_WQ * mlx5e_mpwqe_get_log_rq_size(params); Okay, fixed. Ready for V10, when net-next opens again...
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1a05d1072c5e..3317a4da87cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -53,6 +53,8 @@ #include "mlx5_core.h" #include "en_stats.h" +struct page_pool; + #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) @@ -534,6 +536,7 @@ struct mlx5e_rq { unsigned int hw_mtu; struct mlx5e_xdpsq xdpsq; DECLARE_BITMAP(flags, 8); + struct page_pool *page_pool; /* control */ struct mlx5_wq_ctrl wq_ctrl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 13c1e61258a7..d0f2cd86ef32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,6 +35,7 @@ #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include <net/page_pool.h> #include "eswitch.h" #include "en.h" #include "en_tc.h" @@ -389,10 +390,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { + struct page_pool_params pp_params = { 0 }; struct mlx5_core_dev *mdev = c->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); - u32 byte_count; + u32 byte_count, pool_size; int npages; int wq_sz; int err; @@ -432,9 +434,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + pool_size = 1 << params->log_rq_mtu_frames; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + + pool_size = pool_size * MLX5_MPWRQ_PAGES_PER_WQE; rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -512,13 +517,31 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->mkey_be = c->mkey_be; } - /* This must only be activate for order-0 pages */ - if (rq->xdp_prog) { - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_ORDER0, NULL); - if (err) - goto err_rq_wq_destroy; + /* Create a page_pool and register it with rxq */ + pp_params.order = rq->buff.page_order; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + kfree(rq->wqe.frag_info); + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_rq_wq_destroy; } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); + if (err) + goto err_rq_wq_destroy; for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); @@ -556,6 +579,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; @@ -569,6 +594,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0e24be05907f..f42436d7f2d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -37,6 +37,7 @@ #include <linux/bpf_trace.h> #include <net/busy_poll.h> #include <net/ip6_checksum.h> +#include <net/page_pool.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -221,7 +222,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, if (mlx5e_rx_cache_get(rq, dma_info)) return 0; - dma_info->page = dev_alloc_pages(rq->buff.page_order); + dma_info->page = page_pool_dev_alloc_pages(rq->page_pool); if (unlikely(!dma_info->page)) return -ENOMEM; @@ -246,11 +247,16 @@ static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle) { - if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) - return; + if (likely(recycle)) { + if (mlx5e_rx_cache_put(rq, dma_info)) + return; - mlx5e_page_dma_unmap(rq, dma_info); - put_page(dma_info->page); + mlx5e_page_dma_unmap(rq, dma_info); + page_pool_recycle_direct(rq->page_pool, dma_info->page); + } else { + mlx5e_page_dma_unmap(rq, dma_info); + put_page(dma_info->page); + } } static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,