diff mbox

[v1,net] net/mlx4: Avoid wrong virtual mappings

Message ID 1461740820-15386-1-git-send-email-hagaya@mellanox.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Haggai Abramovsky April 27, 2016, 7:07 a.m. UTC
The dma_alloc_coherent() function returns a virtual address which can
be used for coherent access to the underlying memory.  On some
architectures, like arm64, undefined behavior results if this memory is
also accessed via virtual mappings that are not coherent.  Because of
their undefined nature, operations like virt_to_page() return garbage
when passed virtual addresses obtained from dma_alloc_coherent().  Any
subsequent mappings via vmap() of the garbage page values are unusable
and result in bad things like bus errors (synchronous aborts in ARM64
speak).

The mlx4 driver contains code that does the equivalent of:
vmap(virt_to_page(dma_alloc_coherent)), this results in an OOPs when the
device is opened.

Prevent Ethernet driver to run this problematic code by forcing it to
allocate contiguous memory. As for the Infiniband driver, at first we
are trying to allocate contiguous memory, but in case of failure roll
back to work with fragmented memory.

Signed-off-by: Haggai Abramovsky <hagaya@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reported-by: David Daney <david.daney@cavium.com>
Tested-by: Sinan Kaya <okaya@codeaurora.org>
---
Changes from v0:
	Fix indentation error raised by LeonR

 drivers/infiniband/hw/mlx4/qp.c                   | 26 +++++--
 drivers/net/ethernet/mellanox/mlx4/alloc.c        | 93 ++++++++++-------------
 drivers/net/ethernet/mellanox/mlx4/en_cq.c        |  9 +--
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_resources.c | 31 --------
 drivers/net/ethernet/mellanox/mlx4/en_rx.c        | 11 +--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c        | 14 +---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h      |  2 -
 include/linux/mlx4/device.h                       |  4 +-
 9 files changed, 67 insertions(+), 125 deletions(-)

Comments

Sinan Kaya May 1, 2016, 3:43 p.m. UTC | #1
On 4/27/2016 3:07 AM, Haggai Abramovsky wrote:
> The dma_alloc_coherent() function returns a virtual address which can
> be used for coherent access to the underlying memory.  On some
> architectures, like arm64, undefined behavior results if this memory is
> also accessed via virtual mappings that are not coherent.  Because of
> their undefined nature, operations like virt_to_page() return garbage
> when passed virtual addresses obtained from dma_alloc_coherent().  Any
> subsequent mappings via vmap() of the garbage page values are unusable
> and result in bad things like bus errors (synchronous aborts in ARM64
> speak).
> 
> The mlx4 driver contains code that does the equivalent of:
> vmap(virt_to_page(dma_alloc_coherent)), this results in an OOPs when the
> device is opened.
> 
> Prevent Ethernet driver to run this problematic code by forcing it to
> allocate contiguous memory. As for the Infiniband driver, at first we
> are trying to allocate contiguous memory, but in case of failure roll
> back to work with fragmented memory.
> 
> Signed-off-by: Haggai Abramovsky <hagaya@mellanox.com>
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Reported-by: David Daney <david.daney@cavium.com>
> Tested-by: Sinan Kaya <okaya@codeaurora.org>
> ---
> Changes from v0:
> 	Fix indentation error raised by LeonR
> 
>  drivers/infiniband/hw/mlx4/qp.c                   | 26 +++++--
>  drivers/net/ethernet/mellanox/mlx4/alloc.c        | 93 ++++++++++-------------
>  drivers/net/ethernet/mellanox/mlx4/en_cq.c        |  9 +--
>  drivers/net/ethernet/mellanox/mlx4/en_netdev.c    |  2 +-
>  drivers/net/ethernet/mellanox/mlx4/en_resources.c | 31 --------
>  drivers/net/ethernet/mellanox/mlx4/en_rx.c        | 11 +--
>  drivers/net/ethernet/mellanox/mlx4/en_tx.c        | 14 +---
>  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h      |  2 -
>  include/linux/mlx4/device.h                       |  4 +-
>  9 files changed, 67 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
> index fd97534..842a6da 100644
> --- a/drivers/infiniband/hw/mlx4/qp.c
> +++ b/drivers/infiniband/hw/mlx4/qp.c
> @@ -419,7 +419,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
>  }
>  
>  static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
> -			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
> +			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
> +			      int shrink_wqe)
>  {
>  	int s;
>  
> @@ -477,7 +478,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
>  	 * We set WQE size to at least 64 bytes, this way stamping
>  	 * invalidates each WQE.
>  	 */
> -	if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
> +	if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
>  	    qp->sq_signal_bits && BITS_PER_LONG == 64 &&
>  	    type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
>  	    !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
> @@ -642,6 +643,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>  {
>  	int qpn;
>  	int err;
> +	struct ib_qp_cap backup_cap;
>  	struct mlx4_ib_sqp *sqp;
>  	struct mlx4_ib_qp *qp;
>  	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
> @@ -775,7 +777,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>  				goto err;
>  		}
>  
> -		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
> +		memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
> +		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp, 1);
>  		if (err)
>  			goto err;
>  
> @@ -787,9 +790,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>  			*qp->db.db = 0;
>  		}
>  
> -		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
> -			err = -ENOMEM;
> -			goto err_db;
> +		if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
> +				   &qp->buf, gfp)) {
> +			memcpy(&init_attr->cap, &backup_cap,
> +			       sizeof(backup_cap));
> +			err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
> +						 qp, 0);
> +			if (err)
> +				goto err_db;
> +
> +			if (mlx4_buf_alloc(dev->dev, qp->buf_size,
> +					   PAGE_SIZE * 2, &qp->buf, gfp)) {
> +				err = -ENOMEM;
> +				goto err_db;
> +			}
>  		}
>  
>  		err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
> diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
> index 0c51c69..249a458 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
> @@ -576,41 +576,48 @@ out:
>  
>  	return res;
>  }
> -/*
> - * Handling for queue buffers -- we allocate a bunch of memory and
> - * register it in a memory region at HCA virtual address 0.  If the
> - * requested size is > max_direct, we split the allocation into
> - * multiple pages, so we don't require too much contiguous memory.
> - */
>  
> -int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
> -		   struct mlx4_buf *buf, gfp_t gfp)
> +static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
> +				 struct mlx4_buf *buf, gfp_t gfp)
>  {
>  	dma_addr_t t;
>  
> -	if (size <= max_direct) {
> -		buf->nbufs        = 1;
> -		buf->npages       = 1;
> -		buf->page_shift   = get_order(size) + PAGE_SHIFT;
> -		buf->direct.buf   = dma_alloc_coherent(&dev->persist->pdev->dev,
> -						       size, &t, gfp);
> -		if (!buf->direct.buf)
> -			return -ENOMEM;
> +	buf->nbufs        = 1;
> +	buf->npages       = 1;
> +	buf->page_shift   = get_order(size) + PAGE_SHIFT;
> +	buf->direct.buf   =
> +		dma_zalloc_coherent(&dev->persist->pdev->dev,
> +				    size, &t, gfp);
> +	if (!buf->direct.buf)
> +		return -ENOMEM;
>  
> -		buf->direct.map = t;
> +	buf->direct.map = t;
>  
> -		while (t & ((1 << buf->page_shift) - 1)) {
> -			--buf->page_shift;
> -			buf->npages *= 2;
> -		}
> +	while (t & ((1 << buf->page_shift) - 1)) {
> +		--buf->page_shift;
> +		buf->npages *= 2;
> +	}
>  
> -		memset(buf->direct.buf, 0, size);
> +	return 0;
> +}
> +
> +/* Handling for queue buffers -- we allocate a bunch of memory and
> + * register it in a memory region at HCA virtual address 0. If the
> + *  requested size is > max_direct, we split the allocation into
> + *  multiple pages, so we don't require too much contiguous memory.
> + */
> +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
> +		   struct mlx4_buf *buf, gfp_t gfp)
> +{
> +	if (size <= max_direct) {
> +		return mlx4_buf_direct_alloc(dev, size, buf, gfp);
>  	} else {
> +		dma_addr_t t;
>  		int i;
>  
> -		buf->direct.buf  = NULL;
> -		buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> -		buf->npages      = buf->nbufs;
> +		buf->direct.buf = NULL;
> +		buf->nbufs	= (size + PAGE_SIZE - 1) / PAGE_SIZE;
> +		buf->npages	= buf->nbufs;
>  		buf->page_shift  = PAGE_SHIFT;
>  		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
>  					   gfp);
> @@ -619,28 +626,12 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
>  
>  		for (i = 0; i < buf->nbufs; ++i) {
>  			buf->page_list[i].buf =
> -				dma_alloc_coherent(&dev->persist->pdev->dev,
> -						   PAGE_SIZE,
> -						   &t, gfp);
> +				dma_zalloc_coherent(&dev->persist->pdev->dev,
> +						    PAGE_SIZE, &t, gfp);
>  			if (!buf->page_list[i].buf)
>  				goto err_free;
>  
>  			buf->page_list[i].map = t;
> -
> -			memset(buf->page_list[i].buf, 0, PAGE_SIZE);
> -		}
> -
> -		if (BITS_PER_LONG == 64) {
> -			struct page **pages;
> -			pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
> -			if (!pages)
> -				goto err_free;
> -			for (i = 0; i < buf->nbufs; ++i)
> -				pages[i] = virt_to_page(buf->page_list[i].buf);
> -			buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
> -			kfree(pages);
> -			if (!buf->direct.buf)
> -				goto err_free;
>  		}
>  	}
>  
> @@ -655,15 +646,11 @@ EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
>  
>  void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
>  {
> -	int i;
> -
> -	if (buf->nbufs == 1)
> +	if (buf->nbufs == 1) {
>  		dma_free_coherent(&dev->persist->pdev->dev, size,
> -				  buf->direct.buf,
> -				  buf->direct.map);
> -	else {
> -		if (BITS_PER_LONG == 64)
> -			vunmap(buf->direct.buf);
> +				  buf->direct.buf, buf->direct.map);
> +	} else {
> +		int i;
>  
>  		for (i = 0; i < buf->nbufs; ++i)
>  			if (buf->page_list[i].buf)
> @@ -789,7 +776,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
>  EXPORT_SYMBOL_GPL(mlx4_db_free);
>  
>  int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
> -		       int size, int max_direct)
> +		       int size)
>  {
>  	int err;
>  
> @@ -799,7 +786,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
>  
>  	*wqres->db.db = 0;
>  
> -	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
> +	err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL);
>  	if (err)
>  		goto err_db;
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
> index af975a2..132cea6 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
> @@ -73,22 +73,16 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
>  	 */
>  	set_dev_node(&mdev->dev->persist->pdev->dev, node);
>  	err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
> -				cq->buf_size, 2 * PAGE_SIZE);
> +				cq->buf_size);
>  	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
>  	if (err)
>  		goto err_cq;
>  
> -	err = mlx4_en_map_buffer(&cq->wqres.buf);
> -	if (err)
> -		goto err_res;
> -
>  	cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf;
>  	*pcq = cq;
>  
>  	return 0;
>  
> -err_res:
> -	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
>  err_cq:
>  	kfree(cq);
>  	*pcq = NULL;
> @@ -177,7 +171,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
>  	struct mlx4_en_dev *mdev = priv->mdev;
>  	struct mlx4_en_cq *cq = *pcq;
>  
> -	mlx4_en_unmap_buffer(&cq->wqres.buf);
>  	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
>  	if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
>  	    cq->is_tx == RX)
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index b4b258c..5b19178 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -2907,7 +2907,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
>  
>  	/* Allocate page for receive rings */
>  	err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
> -				MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
> +				MLX4_EN_PAGE_SIZE);
>  	if (err) {
>  		en_err(priv, "Failed to allocate page for rx qps\n");
>  		goto out;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
> index 02e925d..a6b0db0 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
> @@ -107,37 +107,6 @@ int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
>  	return ret;
>  }
>  
> -int mlx4_en_map_buffer(struct mlx4_buf *buf)
> -{
> -	struct page **pages;
> -	int i;
> -
> -	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
> -		return 0;
> -
> -	pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
> -	if (!pages)
> -		return -ENOMEM;
> -
> -	for (i = 0; i < buf->nbufs; ++i)
> -		pages[i] = virt_to_page(buf->page_list[i].buf);
> -
> -	buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
> -	kfree(pages);
> -	if (!buf->direct.buf)
> -		return -ENOMEM;
> -
> -	return 0;
> -}
> -
> -void mlx4_en_unmap_buffer(struct mlx4_buf *buf)
> -{
> -	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
> -		return;
> -
> -	vunmap(buf->direct.buf);
> -}
> -
>  void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
>  {
>      return;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index 7d25bc9..89775bb 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -394,17 +394,11 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
>  
>  	/* Allocate HW buffers on provided NUMA node */
>  	set_dev_node(&mdev->dev->persist->pdev->dev, node);
> -	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
> -				 ring->buf_size, 2 * PAGE_SIZE);
> +	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
>  	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
>  	if (err)
>  		goto err_info;
>  
> -	err = mlx4_en_map_buffer(&ring->wqres.buf);
> -	if (err) {
> -		en_err(priv, "Failed to map RX buffer\n");
> -		goto err_hwq;
> -	}
>  	ring->buf = ring->wqres.buf.direct.buf;
>  
>  	ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
> @@ -412,8 +406,6 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
>  	*pring = ring;
>  	return 0;
>  
> -err_hwq:
> -	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
>  err_info:
>  	vfree(ring->rx_info);
>  	ring->rx_info = NULL;
> @@ -517,7 +509,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
>  	struct mlx4_en_dev *mdev = priv->mdev;
>  	struct mlx4_en_rx_ring *ring = *pring;
>  
> -	mlx4_en_unmap_buffer(&ring->wqres.buf);
>  	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
>  	vfree(ring->rx_info);
>  	ring->rx_info = NULL;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> index c0d7b72..b9ab646 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> @@ -93,20 +93,13 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
>  
>  	/* Allocate HW buffers on provided NUMA node */
>  	set_dev_node(&mdev->dev->persist->pdev->dev, node);
> -	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
> -				 2 * PAGE_SIZE);
> +	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
>  	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
>  	if (err) {
>  		en_err(priv, "Failed allocating hwq resources\n");
>  		goto err_bounce;
>  	}
>  
> -	err = mlx4_en_map_buffer(&ring->wqres.buf);
> -	if (err) {
> -		en_err(priv, "Failed to map TX buffer\n");
> -		goto err_hwq_res;
> -	}
> -
>  	ring->buf = ring->wqres.buf.direct.buf;
>  
>  	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
> @@ -117,7 +110,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
>  				    MLX4_RESERVE_ETH_BF_QP);
>  	if (err) {
>  		en_err(priv, "failed reserving qp for TX ring\n");
> -		goto err_map;
> +		goto err_hwq_res;
>  	}
>  
>  	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
> @@ -154,8 +147,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
>  
>  err_reserve:
>  	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
> -err_map:
> -	mlx4_en_unmap_buffer(&ring->wqres.buf);
>  err_hwq_res:
>  	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
>  err_bounce:
> @@ -182,7 +173,6 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
>  	mlx4_qp_remove(mdev->dev, &ring->qp);
>  	mlx4_qp_free(mdev->dev, &ring->qp);
>  	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
> -	mlx4_en_unmap_buffer(&ring->wqres.buf);
>  	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
>  	kfree(ring->bounce_buf);
>  	ring->bounce_buf = NULL;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> index d12ab6a..a70e2d0 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> @@ -671,8 +671,6 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
>  		int is_tx, int rss, int qpn, int cqn, int user_prio,
>  		struct mlx4_qp_context *context);
>  void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
> -int mlx4_en_map_buffer(struct mlx4_buf *buf);
> -void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
>  int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
>  			    int loopback);
>  void mlx4_en_calc_rx_buf(struct net_device *dev);
> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
> index 8541a91..72da65f 100644
> --- a/include/linux/mlx4/device.h
> +++ b/include/linux/mlx4/device.h
> @@ -1051,7 +1051,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
>  void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
>  static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
>  {
> -	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
> +	if (buf->nbufs == 1)
>  		return buf->direct.buf + offset;
>  	else
>  		return buf->page_list[offset >> PAGE_SHIFT].buf +
> @@ -1091,7 +1091,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
>  void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
>  
>  int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
> -		       int size, int max_direct);
> +		       int size);
>  void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
>  		       int size);
>  
> 

Can we get this queued for 4.7? Mellanox with arm64 has been broken over 1.5 years.
David Miller May 1, 2016, 8:07 p.m. UTC | #2
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 1 May 2016 11:43:54 -0400

> Can we get this queued for 4.7? Mellanox with arm64 has been broken over 1.5 years.

Can you not quote an entire huge patch just to say something like
this?

I'm waiting for the people involved all to give feedback.

If it's been broken for more than a year, waiting for a few more
days to get the fix right is not going to be the end of the world.
Sinan Kaya May 1, 2016, 8:12 p.m. UTC | #3
On 5/1/2016 4:07 PM, David Miller wrote:
> From: Sinan Kaya <okaya@codeaurora.org>
> Date: Sun, 1 May 2016 11:43:54 -0400
> 
>> Can we get this queued for 4.7? Mellanox with arm64 has been broken over 1.5 years.
> 
> Can you not quote an entire huge patch just to say something like
> this?
> 
> I'm waiting for the people involved all to give feedback.
> 
> If it's been broken for more than a year, waiting for a few more
> days to get the fix right is not going to be the end of the world.
> 

Sure, I thought there is a common consensus. The original patch posted was from Christoph Hellwig.

This patch does the same thing like Christoph Hellwig proposed + a fallback approach for the
infiniband case where fragmented buffers are still supported.

The patch is actually a copy/paste from the MLX5 driver into MLX4.

I'm OK with waiitng for a few more days. I just wanted to make sure that this patch
is moving in the right direction.

If not, let's list the open issues.
David Miller May 1, 2016, 11:13 p.m. UTC | #4
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 1 May 2016 16:12:13 -0400

> I just wanted to make sure that this patch is moving in the right
> direction.

You never need to wonder about this.

If the patch is in an active state in the patchwork queue, which this
one is, there is absolutely nothing you need to do nor ask about the
patch.
Or Gerlitz May 3, 2016, 7:45 a.m. UTC | #5
On Wed, Apr 27, 2016 at 10:07 AM, Haggai Abramovsky <hagaya@mellanox.com> wrote:
> The dma_alloc_coherent() function returns a virtual address which can
> be used for coherent access to the underlying memory.  On some
> architectures, like arm64, undefined behavior results if this memory is
> also accessed via virtual mappings that are not coherent.  Because of
> their undefined nature, operations like virt_to_page() return garbage
> when passed virtual addresses obtained from dma_alloc_coherent().  Any
> subsequent mappings via vmap() of the garbage page values are unusable
> and result in bad things like bus errors (synchronous aborts in ARM64
> speak).
>
> The mlx4 driver contains code that does the equivalent of:
> vmap(virt_to_page(dma_alloc_coherent)), this results in an OOPs when the
> device is opened.
>
> Prevent Ethernet driver to run this problematic code by forcing it to
> allocate contiguous memory. As for the Infiniband driver, at first we
> are trying to allocate contiguous memory, but in case of failure roll
> back to work with fragmented memory.

Dave,

The patch changes the driver to do single allocation for potentially
very large HW WQE
descriptor buffers such as those used by the RDMA (mlx5_ib) driver.
The IB driver does
have the means to cope with fragmented allocations, and under RDMA use
cases, QPs
are being frequently set not only on system startup, but rather
throughout all the lifecycles
(e.g every now and then in production systems). As of all the above,
we prefer the patch
to go to net-next and not net. This will make the code (1) correct,
and (2) we have the chance
to do the whatever investigations needed and if required add a follow
up fix for 4.7-rc

Or.

> Signed-off-by: Haggai Abramovsky <hagaya@mellanox.com>
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Reported-by: David Daney <david.daney@cavium.com>
> Tested-by: Sinan Kaya <okaya@codeaurora.org>
David Miller May 3, 2016, 4 p.m. UTC | #6
From: Or Gerlitz <gerlitz.or@gmail.com>
Date: Tue, 3 May 2016 10:45:43 +0300

> The patch changes the driver to do single allocation for potentially
> very large HW WQE descriptor buffers such as those used by the RDMA
> (mlx5_ib) driver.

I know exactly what this patch does and how, but thanks for trying to
teach me what it does anyways.
David Miller May 3, 2016, 4:41 p.m. UTC | #7
From: Haggai Abramovsky <hagaya@mellanox.com>
Date: Wed, 27 Apr 2016 10:07:00 +0300

> @@ -419,7 +419,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
>  }
>  
>  static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
> -			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
> +			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
> +			      int shrink_wqe)
>  {
>  	int s;
>  

Please use 'bool' and "true/false" for shrink_wqe.
Or Gerlitz May 3, 2016, 6:15 p.m. UTC | #8
On Tue, May 3, 2016, David Miller <davem@davemloft.net> wrote:
> From: Or Gerlitz <gerlitz.or@gmail.com>

>> The patch changes the driver to do single allocation for potentially
>> very large HW WQE descriptor buffers such as those used by the RDMA
>> (mlx5_ib) driver.

> I know exactly what this patch does and how, but thanks for trying to
> teach me what it does anyways.

Dave,

This was more of a thinking out loud, not trying to teach you anything.

As I said, we're aiming for net-next, and as such the re-spin to
address your comment on bool vs int will be targeted there.

Or.
diff mbox

Patch

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index fd97534..842a6da 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -419,7 +419,8 @@  static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 }
 
 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
+			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
+			      int shrink_wqe)
 {
 	int s;
 
@@ -477,7 +478,7 @@  static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	 * We set WQE size to at least 64 bytes, this way stamping
 	 * invalidates each WQE.
 	 */
-	if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
+	if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
 	    qp->sq_signal_bits && BITS_PER_LONG == 64 &&
 	    type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
 	    !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
@@ -642,6 +643,7 @@  static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 {
 	int qpn;
 	int err;
+	struct ib_qp_cap backup_cap;
 	struct mlx4_ib_sqp *sqp;
 	struct mlx4_ib_qp *qp;
 	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
@@ -775,7 +777,8 @@  static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 				goto err;
 		}
 
-		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
+		memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
+		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp, 1);
 		if (err)
 			goto err;
 
@@ -787,9 +790,20 @@  static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			*qp->db.db = 0;
 		}
 
-		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
-			err = -ENOMEM;
-			goto err_db;
+		if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
+				   &qp->buf, gfp)) {
+			memcpy(&init_attr->cap, &backup_cap,
+			       sizeof(backup_cap));
+			err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
+						 qp, 0);
+			if (err)
+				goto err_db;
+
+			if (mlx4_buf_alloc(dev->dev, qp->buf_size,
+					   PAGE_SIZE * 2, &qp->buf, gfp)) {
+				err = -ENOMEM;
+				goto err_db;
+			}
 		}
 
 		err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 0c51c69..249a458 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -576,41 +576,48 @@  out:
 
 	return res;
 }
-/*
- * Handling for queue buffers -- we allocate a bunch of memory and
- * register it in a memory region at HCA virtual address 0.  If the
- * requested size is > max_direct, we split the allocation into
- * multiple pages, so we don't require too much contiguous memory.
- */
 
-int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf, gfp_t gfp)
+static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
+				 struct mlx4_buf *buf, gfp_t gfp)
 {
 	dma_addr_t t;
 
-	if (size <= max_direct) {
-		buf->nbufs        = 1;
-		buf->npages       = 1;
-		buf->page_shift   = get_order(size) + PAGE_SHIFT;
-		buf->direct.buf   = dma_alloc_coherent(&dev->persist->pdev->dev,
-						       size, &t, gfp);
-		if (!buf->direct.buf)
-			return -ENOMEM;
+	buf->nbufs        = 1;
+	buf->npages       = 1;
+	buf->page_shift   = get_order(size) + PAGE_SHIFT;
+	buf->direct.buf   =
+		dma_zalloc_coherent(&dev->persist->pdev->dev,
+				    size, &t, gfp);
+	if (!buf->direct.buf)
+		return -ENOMEM;
 
-		buf->direct.map = t;
+	buf->direct.map = t;
 
-		while (t & ((1 << buf->page_shift) - 1)) {
-			--buf->page_shift;
-			buf->npages *= 2;
-		}
+	while (t & ((1 << buf->page_shift) - 1)) {
+		--buf->page_shift;
+		buf->npages *= 2;
+	}
 
-		memset(buf->direct.buf, 0, size);
+	return 0;
+}
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0. If the
+ *  requested size is > max_direct, we split the allocation into
+ *  multiple pages, so we don't require too much contiguous memory.
+ */
+int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
+		   struct mlx4_buf *buf, gfp_t gfp)
+{
+	if (size <= max_direct) {
+		return mlx4_buf_direct_alloc(dev, size, buf, gfp);
 	} else {
+		dma_addr_t t;
 		int i;
 
-		buf->direct.buf  = NULL;
-		buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-		buf->npages      = buf->nbufs;
+		buf->direct.buf = NULL;
+		buf->nbufs	= (size + PAGE_SIZE - 1) / PAGE_SIZE;
+		buf->npages	= buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
 					   gfp);
@@ -619,28 +626,12 @@  int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
-				dma_alloc_coherent(&dev->persist->pdev->dev,
-						   PAGE_SIZE,
-						   &t, gfp);
+				dma_zalloc_coherent(&dev->persist->pdev->dev,
+						    PAGE_SIZE, &t, gfp);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
 			buf->page_list[i].map = t;
-
-			memset(buf->page_list[i].buf, 0, PAGE_SIZE);
-		}
-
-		if (BITS_PER_LONG == 64) {
-			struct page **pages;
-			pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
-			if (!pages)
-				goto err_free;
-			for (i = 0; i < buf->nbufs; ++i)
-				pages[i] = virt_to_page(buf->page_list[i].buf);
-			buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
-			kfree(pages);
-			if (!buf->direct.buf)
-				goto err_free;
 		}
 	}
 
@@ -655,15 +646,11 @@  EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
 
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 {
-	int i;
-
-	if (buf->nbufs == 1)
+	if (buf->nbufs == 1) {
 		dma_free_coherent(&dev->persist->pdev->dev, size,
-				  buf->direct.buf,
-				  buf->direct.map);
-	else {
-		if (BITS_PER_LONG == 64)
-			vunmap(buf->direct.buf);
+				  buf->direct.buf, buf->direct.map);
+	} else {
+		int i;
 
 		for (i = 0; i < buf->nbufs; ++i)
 			if (buf->page_list[i].buf)
@@ -789,7 +776,7 @@  void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
 EXPORT_SYMBOL_GPL(mlx4_db_free);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
-		       int size, int max_direct)
+		       int size)
 {
 	int err;
 
@@ -799,7 +786,7 @@  int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 
 	*wqres->db.db = 0;
 
-	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
+	err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_db;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index af975a2..132cea6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -73,22 +73,16 @@  int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	 */
 	set_dev_node(&mdev->dev->persist->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
-				cq->buf_size, 2 * PAGE_SIZE);
+				cq->buf_size);
 	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_cq;
 
-	err = mlx4_en_map_buffer(&cq->wqres.buf);
-	if (err)
-		goto err_res;
-
 	cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf;
 	*pcq = cq;
 
 	return 0;
 
-err_res:
-	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
 err_cq:
 	kfree(cq);
 	*pcq = NULL;
@@ -177,7 +171,6 @@  void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_cq *cq = *pcq;
 
-	mlx4_en_unmap_buffer(&cq->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
 	if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
 	    cq->is_tx == RX)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index b4b258c..5b19178 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2907,7 +2907,7 @@  int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 
 	/* Allocate page for receive rings */
 	err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
-				MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
+				MLX4_EN_PAGE_SIZE);
 	if (err) {
 		en_err(priv, "Failed to allocate page for rx qps\n");
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 02e925d..a6b0db0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -107,37 +107,6 @@  int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
 	return ret;
 }
 
-int mlx4_en_map_buffer(struct mlx4_buf *buf)
-{
-	struct page **pages;
-	int i;
-
-	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
-		return 0;
-
-	pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	for (i = 0; i < buf->nbufs; ++i)
-		pages[i] = virt_to_page(buf->page_list[i].buf);
-
-	buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
-	kfree(pages);
-	if (!buf->direct.buf)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void mlx4_en_unmap_buffer(struct mlx4_buf *buf)
-{
-	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
-		return;
-
-	vunmap(buf->direct.buf);
-}
-
 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
 {
     return;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 7d25bc9..89775bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -394,17 +394,11 @@  int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
 	/* Allocate HW buffers on provided NUMA node */
 	set_dev_node(&mdev->dev->persist->pdev->dev, node);
-	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
-				 ring->buf_size, 2 * PAGE_SIZE);
+	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_info;
 
-	err = mlx4_en_map_buffer(&ring->wqres.buf);
-	if (err) {
-		en_err(priv, "Failed to map RX buffer\n");
-		goto err_hwq;
-	}
 	ring->buf = ring->wqres.buf.direct.buf;
 
 	ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
@@ -412,8 +406,6 @@  int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	*pring = ring;
 	return 0;
 
-err_hwq:
-	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 err_info:
 	vfree(ring->rx_info);
 	ring->rx_info = NULL;
@@ -517,7 +509,6 @@  void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring = *pring;
 
-	mlx4_en_unmap_buffer(&ring->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
 	vfree(ring->rx_info);
 	ring->rx_info = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index c0d7b72..b9ab646 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -93,20 +93,13 @@  int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 
 	/* Allocate HW buffers on provided NUMA node */
 	set_dev_node(&mdev->dev->persist->pdev->dev, node);
-	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
-				 2 * PAGE_SIZE);
+	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err) {
 		en_err(priv, "Failed allocating hwq resources\n");
 		goto err_bounce;
 	}
 
-	err = mlx4_en_map_buffer(&ring->wqres.buf);
-	if (err) {
-		en_err(priv, "Failed to map TX buffer\n");
-		goto err_hwq_res;
-	}
-
 	ring->buf = ring->wqres.buf.direct.buf;
 
 	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
@@ -117,7 +110,7 @@  int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 				    MLX4_RESERVE_ETH_BF_QP);
 	if (err) {
 		en_err(priv, "failed reserving qp for TX ring\n");
-		goto err_map;
+		goto err_hwq_res;
 	}
 
 	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
@@ -154,8 +147,6 @@  int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 
 err_reserve:
 	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
-err_map:
-	mlx4_en_unmap_buffer(&ring->wqres.buf);
 err_hwq_res:
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 err_bounce:
@@ -182,7 +173,6 @@  void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 	mlx4_qp_remove(mdev->dev, &ring->qp);
 	mlx4_qp_free(mdev->dev, &ring->qp);
 	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
-	mlx4_en_unmap_buffer(&ring->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 	kfree(ring->bounce_buf);
 	ring->bounce_buf = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d12ab6a..a70e2d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -671,8 +671,6 @@  void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 		int is_tx, int rss, int qpn, int cqn, int user_prio,
 		struct mlx4_qp_context *context);
 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
-int mlx4_en_map_buffer(struct mlx4_buf *buf);
-void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
 int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
 			    int loopback);
 void mlx4_en_calc_rx_buf(struct net_device *dev);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 8541a91..72da65f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1051,7 +1051,7 @@  int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
-	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
+	if (buf->nbufs == 1)
 		return buf->direct.buf + offset;
 	else
 		return buf->page_list[offset >> PAGE_SHIFT].buf +
@@ -1091,7 +1091,7 @@  int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
-		       int size, int max_direct);
+		       int size);
 void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
 		       int size);