diff mbox series

[6/9] swiotlb: refactor swiotlb_tbl_map_single

Message ID 20210921012836.25550-7-khalid.elmously@canonical.com
State New
Headers show
Series Fix nvme errors w/ swiotlb in confidential VMs | expand

Commit Message

Khalid Elmously Sept. 21, 2021, 1:28 a.m. UTC
From: Christoph Hellwig <hch@lst.de>

BugLink: https://bugs.launchpad.net/bugs/1943902

Split out a bunch of a self-contained helpers to make the function easier
to follow.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jianxiong Gao <jxgao@google.com>
Tested-by: Jianxiong Gao <jxgao@google.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
(backported picked from 26a7e094783d482f3e125f09945a5bb1d867b2e6)
[ kmously: different implementation swiotlb_tbl_map_single() required
 manual removal. Also used __phys_to_dma() instead of
phys_to_dma_unencrypted() in find_slots ]
Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
---
 kernel/dma/swiotlb.c | 184 +++++++++++++++++++++----------------------
 1 file changed, 91 insertions(+), 93 deletions(-)

Comments

Kleber Sacilotto de Souza Sept. 21, 2021, 8:43 a.m. UTC | #1
On 21.09.21 03:28, Khalid Elmously wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> BugLink: https://bugs.launchpad.net/bugs/1943902
> 
> Split out a bunch of a self-contained helpers to make the function easier
> to follow.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Jianxiong Gao <jxgao@google.com>
> Tested-by: Jianxiong Gao <jxgao@google.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> (backported picked from 26a7e094783d482f3e125f09945a5bb1d867b2e6)

s/picked//

> [ kmously: different implementation swiotlb_tbl_map_single() required
>   manual removal. Also used __phys_to_dma() instead of
> phys_to_dma_unencrypted() in find_slots ]
> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
> ---
>   kernel/dma/swiotlb.c | 184 +++++++++++++++++++++----------------------
>   1 file changed, 91 insertions(+), 93 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index af22c3c5e488c..5a0d9d4864aca 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -453,137 +453,135 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
>   	}
>   }
>   
> -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
> -				   dma_addr_t tbl_dma_addr,
> -				   phys_addr_t orig_addr,
> -				   size_t mapping_size,
> -				   size_t alloc_size,
> -				   enum dma_data_direction dir,
> -				   unsigned long attrs)
> -{
> -	unsigned long flags;
> -	phys_addr_t tlb_addr;
> -	unsigned int nslots, stride, index, wrap;
> -	int i;
> -	unsigned long mask;
> -	unsigned long offset_slots;
> -	unsigned long max_slots;
> -	unsigned long tmp_io_tlb_used;
> -
> -	if (no_iotlb_memory)
> -		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> -
> -	if (mem_encrypt_active())
> -		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
>   
> -	if (mapping_size > alloc_size) {
> -		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> -			      mapping_size, alloc_size);
> -		return (phys_addr_t)DMA_MAPPING_ERROR;
> -	}
> -
> -	mask = dma_get_seg_boundary(hwdev);
> +/*
> + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
> + */
> +static inline unsigned long get_max_slots(unsigned long boundary_mask)
> +{
> +	if (boundary_mask == ~0UL)
> +		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> +	return nr_slots(boundary_mask + 1);
> +}
>   
> -	tbl_dma_addr &= mask;
> +static unsigned int wrap_index(unsigned int index)
> +{
> +	if (index >= io_tlb_nslabs)
> +		return 0;
> +	return index;
> +}
>   
> -	offset_slots = nr_slots(tbl_dma_addr);
> +/*
> + * Find a suitable number of IO TLB entries size that will fit this request and
> + * allocate a buffer from that IO TLB pool.
> + */
> +static int find_slots(struct device *dev, size_t alloc_size)
> +{
> +	unsigned long boundary_mask = dma_get_seg_boundary(dev);
> +	dma_addr_t tbl_dma_addr =
> +		__phys_to_dma(dev, io_tlb_start) & boundary_mask;
> +	unsigned long max_slots = get_max_slots(boundary_mask);
> +	unsigned int nslots = nr_slots(alloc_size), stride = 1;
> +	unsigned int index, wrap, count = 0, i;
> +	unsigned long flags;
>   
> -	/*
> -	 * Carefully handle integer overflow which can occur when mask == ~0UL.
> -	 */
> -	max_slots = mask + 1
> -		    ? nr_slots(mask + 1)
> -		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> +	BUG_ON(!nslots);
>   
>   	/*
>   	 * For mappings greater than or equal to a page, we limit the stride
>   	 * (and hence alignment) to a page size.
>   	 */
> -	nslots = nr_slots(alloc_size);
>   	if (alloc_size >= PAGE_SIZE)
> -		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
> -	else
> -		stride = 1;
> -
> -	BUG_ON(!nslots);
> +		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
>   
> -	/*
> -	 * Find suitable number of IO TLB entries size that will fit this
> -	 * request and allocate a buffer from that IO TLB pool.
> -	 */
>   	spin_lock_irqsave(&io_tlb_lock, flags);
> -
>   	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
>   		goto not_found;
>   
> -	index = ALIGN(io_tlb_index, stride);
> -	if (index >= io_tlb_nslabs)
> -		index = 0;
> -	wrap = index;
> -
> +	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
>   	do {
> -		while (iommu_is_span_boundary(index, nslots, offset_slots,
> -					      max_slots)) {
> -			index += stride;
> -			if (index >= io_tlb_nslabs)
> -				index = 0;
> -			if (index == wrap)
> -				goto not_found;
> -		}
> -
>   		/*
>   		 * If we find a slot that indicates we have 'nslots' number of
>   		 * contiguous buffers, we allocate the buffers from that slot
>   		 * and mark the entries as '0' indicating unavailable.
>   		 */
> -		if (io_tlb_list[index] >= nslots) {
> -			int count = 0;
> -
> -			for (i = index; i < (int) (index + nslots); i++)
> -				io_tlb_list[i] = 0;
> -			for (i = index - 1;
> -			     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> -			     io_tlb_list[i]; i--)
> -				io_tlb_list[i] = ++count;
> -			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
> -
> -			/*
> -			 * Update the indices to avoid searching in the next
> -			 * round.
> -			 */
> -			io_tlb_index = ((index + nslots) < io_tlb_nslabs
> -					? (index + nslots) : 0);
> -
> -			goto found;
> +		if (!iommu_is_span_boundary(index, nslots,
> +					    nr_slots(tbl_dma_addr),
> +					    max_slots)) {
> +			if (io_tlb_list[index] >= nslots)
> +				goto found;
>   		}
> -		index += stride;
> -		if (index >= io_tlb_nslabs)
> -			index = 0;
> +		index = wrap_index(index + stride);
>   	} while (index != wrap);
>   
>   not_found:
> -	tmp_io_tlb_used = io_tlb_used;
> -
>   	spin_unlock_irqrestore(&io_tlb_lock, flags);
> -	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
> -		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> -			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
> -	return (phys_addr_t)DMA_MAPPING_ERROR;
> +	return -1;
> +
>   found:
> +	for (i = index; i < index + nslots; i++)
> +		io_tlb_list[i] = 0;
> +	for (i = index - 1;
> +	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> +	     io_tlb_list[i]; i--)
> +		io_tlb_list[i] = ++count;
> +
> +	/*
> +	 * Update the indices to avoid searching in the next round.
> +	 */
> +	if (index + nslots < io_tlb_nslabs)
> +		io_tlb_index = index + nslots;
> +	else
> +		io_tlb_index = 0;
>   	io_tlb_used += nslots;
> +
>   	spin_unlock_irqrestore(&io_tlb_lock, flags);
> +	return index;
> +}
> +
> +phys_addr_t swiotlb_tbl_map_single(struct device *dev, dma_addr_t dma_addr,
> +				phys_addr_t orig_addr, size_t mapping_size,
> +				size_t alloc_size,
> +				enum dma_data_direction dir,
> +				unsigned long attrs)
> +{
> +	unsigned int index, i;
> +	phys_addr_t tlb_addr;
> +
> +	if (no_iotlb_memory)
> +		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> +
> +	if (mem_encrypt_active())
> +		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +
> +	if (mapping_size > alloc_size) {
> +		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> +			      mapping_size, alloc_size);
> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> +	}
> +
> +	index = find_slots(dev, alloc_size);
> +	if (index == -1) {
> +		if (!(attrs & DMA_ATTR_NO_WARN))
> +			dev_warn_ratelimited(dev,
> +	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> +				 alloc_size, io_tlb_nslabs, io_tlb_used);
> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> +	}
>   
>   	/*
>   	 * Save away the mapping from the original address to the DMA address.
>   	 * This is needed when we sync the memory.  Then we sync the buffer if
>   	 * needed.
>   	 */
> -	for (i = 0; i < nslots; i++)
> -		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
> +	for (i = 0; i < nr_slots(alloc_size); i++)
> +		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
> +
> +	tlb_addr = slot_addr(io_tlb_start, index);
>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>   	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
>   		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
> -
>   	return tlb_addr;
>   }
>   
>
Kleber Sacilotto de Souza Sept. 21, 2021, 9:48 a.m. UTC | #2
On 21.09.21 03:28, Khalid Elmously wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> BugLink: https://bugs.launchpad.net/bugs/1943902
> 
> Split out a bunch of a self-contained helpers to make the function easier
> to follow.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Jianxiong Gao <jxgao@google.com>
> Tested-by: Jianxiong Gao <jxgao@google.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> (backported picked from 26a7e094783d482f3e125f09945a5bb1d867b2e6)
> [ kmously: different implementation swiotlb_tbl_map_single() required
>   manual removal. Also used __phys_to_dma() instead of
> phys_to_dma_unencrypted() in find_slots ]
> Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
> ---
>   kernel/dma/swiotlb.c | 184 +++++++++++++++++++++----------------------
>   1 file changed, 91 insertions(+), 93 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index af22c3c5e488c..5a0d9d4864aca 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -453,137 +453,135 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
>   	}
>   }
>   
> -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
> -				   dma_addr_t tbl_dma_addr,
> -				   phys_addr_t orig_addr,
> -				   size_t mapping_size,
> -				   size_t alloc_size,
> -				   enum dma_data_direction dir,
> -				   unsigned long attrs)
> -{
> -	unsigned long flags;
> -	phys_addr_t tlb_addr;
> -	unsigned int nslots, stride, index, wrap;
> -	int i;
> -	unsigned long mask;
> -	unsigned long offset_slots;
> -	unsigned long max_slots;
> -	unsigned long tmp_io_tlb_used;
> -
> -	if (no_iotlb_memory)
> -		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> -
> -	if (mem_encrypt_active())
> -		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
>   
> -	if (mapping_size > alloc_size) {
> -		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> -			      mapping_size, alloc_size);
> -		return (phys_addr_t)DMA_MAPPING_ERROR;
> -	}
> -
> -	mask = dma_get_seg_boundary(hwdev);
> +/*
> + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
> + */
> +static inline unsigned long get_max_slots(unsigned long boundary_mask)
> +{
> +	if (boundary_mask == ~0UL)
> +		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> +	return nr_slots(boundary_mask + 1);
> +}
>   
> -	tbl_dma_addr &= mask;
> +static unsigned int wrap_index(unsigned int index)
> +{
> +	if (index >= io_tlb_nslabs)
> +		return 0;
> +	return index;
> +}
>   
> -	offset_slots = nr_slots(tbl_dma_addr);
> +/*
> + * Find a suitable number of IO TLB entries size that will fit this request and
> + * allocate a buffer from that IO TLB pool.
> + */
> +static int find_slots(struct device *dev, size_t alloc_size)
> +{
> +	unsigned long boundary_mask = dma_get_seg_boundary(dev);
> +	dma_addr_t tbl_dma_addr =
> +		__phys_to_dma(dev, io_tlb_start) & boundary_mask;
> +	unsigned long max_slots = get_max_slots(boundary_mask);
> +	unsigned int nslots = nr_slots(alloc_size), stride = 1;
> +	unsigned int index, wrap, count = 0, i;
> +	unsigned long flags;
>   
> -	/*
> -	 * Carefully handle integer overflow which can occur when mask == ~0UL.
> -	 */
> -	max_slots = mask + 1
> -		    ? nr_slots(mask + 1)
> -		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> +	BUG_ON(!nslots);
>   
>   	/*
>   	 * For mappings greater than or equal to a page, we limit the stride
>   	 * (and hence alignment) to a page size.
>   	 */
> -	nslots = nr_slots(alloc_size);
>   	if (alloc_size >= PAGE_SIZE)
> -		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
> -	else
> -		stride = 1;
> -
> -	BUG_ON(!nslots);
> +		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
>   
> -	/*
> -	 * Find suitable number of IO TLB entries size that will fit this
> -	 * request and allocate a buffer from that IO TLB pool.
> -	 */
>   	spin_lock_irqsave(&io_tlb_lock, flags);
> -
>   	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
>   		goto not_found;
>   
> -	index = ALIGN(io_tlb_index, stride);
> -	if (index >= io_tlb_nslabs)
> -		index = 0;
> -	wrap = index;
> -
> +	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
>   	do {
> -		while (iommu_is_span_boundary(index, nslots, offset_slots,
> -					      max_slots)) {
> -			index += stride;
> -			if (index >= io_tlb_nslabs)
> -				index = 0;
> -			if (index == wrap)
> -				goto not_found;
> -		}
> -
>   		/*
>   		 * If we find a slot that indicates we have 'nslots' number of
>   		 * contiguous buffers, we allocate the buffers from that slot
>   		 * and mark the entries as '0' indicating unavailable.
>   		 */
> -		if (io_tlb_list[index] >= nslots) {
> -			int count = 0;
> -
> -			for (i = index; i < (int) (index + nslots); i++)
> -				io_tlb_list[i] = 0;
> -			for (i = index - 1;
> -			     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> -			     io_tlb_list[i]; i--)
> -				io_tlb_list[i] = ++count;
> -			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
> -
> -			/*
> -			 * Update the indices to avoid searching in the next
> -			 * round.
> -			 */
> -			io_tlb_index = ((index + nslots) < io_tlb_nslabs
> -					? (index + nslots) : 0);
> -
> -			goto found;
> +		if (!iommu_is_span_boundary(index, nslots,
> +					    nr_slots(tbl_dma_addr),
> +					    max_slots)) {
> +			if (io_tlb_list[index] >= nslots)
> +				goto found;
>   		}
> -		index += stride;
> -		if (index >= io_tlb_nslabs)
> -			index = 0;
> +		index = wrap_index(index + stride);
>   	} while (index != wrap);
>   
>   not_found:
> -	tmp_io_tlb_used = io_tlb_used;
> -
>   	spin_unlock_irqrestore(&io_tlb_lock, flags);
> -	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
> -		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> -			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
> -	return (phys_addr_t)DMA_MAPPING_ERROR;
> +	return -1;
> +
>   found:
> +	for (i = index; i < index + nslots; i++)
> +		io_tlb_list[i] = 0;
> +	for (i = index - 1;
> +	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> +	     io_tlb_list[i]; i--)
> +		io_tlb_list[i] = ++count;
> +
> +	/*
> +	 * Update the indices to avoid searching in the next round.
> +	 */
> +	if (index + nslots < io_tlb_nslabs)
> +		io_tlb_index = index + nslots;
> +	else
> +		io_tlb_index = 0;
>   	io_tlb_used += nslots;
> +
>   	spin_unlock_irqrestore(&io_tlb_lock, flags);
> +	return index;
> +}
> +
> +phys_addr_t swiotlb_tbl_map_single(struct device *dev, dma_addr_t dma_addr,

With this refactoring, the second parameter of this function (named tbl_dma_addr
in the original function and dma_addr here) is not used anymore, the address is now
being calculated in find_slots(). This means that all callers of this function is
passing this parameter which is not used anymore.

Should we cherry-pick/backport fc0021aa340af65a0a37d77be39e22aa886a6132 ("swiotlb:
remove the tbl_dma_addr argument to swiotlb_tbl_map_single") to make this backport
cleaner and more consistent?

> +				phys_addr_t orig_addr, size_t mapping_size,
> +				size_t alloc_size,
> +				enum dma_data_direction dir,
> +				unsigned long attrs)
> +{
> +	unsigned int index, i;
> +	phys_addr_t tlb_addr;
> +
> +	if (no_iotlb_memory)
> +		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
> +
> +	if (mem_encrypt_active())
> +		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> +
> +	if (mapping_size > alloc_size) {
> +		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
> +			      mapping_size, alloc_size);
> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> +	}
> +
> +	index = find_slots(dev, alloc_size);
> +	if (index == -1) {

index is defined as unsigned int, so this check is broken.

This has been fixed by 95b079d8215b83b37fa59341fda92fcb9392f14a ("swiotlb: Fix the type of index").
I think we should include this fixup to this patchset.

> +		if (!(attrs & DMA_ATTR_NO_WARN))
> +			dev_warn_ratelimited(dev,
> +	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> +				 alloc_size, io_tlb_nslabs, io_tlb_used);
> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> +	}
>   
>   	/*
>   	 * Save away the mapping from the original address to the DMA address.
>   	 * This is needed when we sync the memory.  Then we sync the buffer if
>   	 * needed.
>   	 */
> -	for (i = 0; i < nslots; i++)
> -		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
> +	for (i = 0; i < nr_slots(alloc_size); i++)
> +		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
> +
> +	tlb_addr = slot_addr(io_tlb_start, index);
>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>   	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
>   		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
> -
>   	return tlb_addr;
>   }
>   
>
diff mbox series

Patch

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index af22c3c5e488c..5a0d9d4864aca 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -453,137 +453,135 @@  static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 	}
 }
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
-				   dma_addr_t tbl_dma_addr,
-				   phys_addr_t orig_addr,
-				   size_t mapping_size,
-				   size_t alloc_size,
-				   enum dma_data_direction dir,
-				   unsigned long attrs)
-{
-	unsigned long flags;
-	phys_addr_t tlb_addr;
-	unsigned int nslots, stride, index, wrap;
-	int i;
-	unsigned long mask;
-	unsigned long offset_slots;
-	unsigned long max_slots;
-	unsigned long tmp_io_tlb_used;
-
-	if (no_iotlb_memory)
-		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
-	if (mem_encrypt_active())
-		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
 
-	if (mapping_size > alloc_size) {
-		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
-			      mapping_size, alloc_size);
-		return (phys_addr_t)DMA_MAPPING_ERROR;
-	}
-
-	mask = dma_get_seg_boundary(hwdev);
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+	if (boundary_mask == ~0UL)
+		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+	return nr_slots(boundary_mask + 1);
+}
 
-	tbl_dma_addr &= mask;
+static unsigned int wrap_index(unsigned int index)
+{
+	if (index >= io_tlb_nslabs)
+		return 0;
+	return index;
+}
 
-	offset_slots = nr_slots(tbl_dma_addr);
+/*
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+static int find_slots(struct device *dev, size_t alloc_size)
+{
+	unsigned long boundary_mask = dma_get_seg_boundary(dev);
+	dma_addr_t tbl_dma_addr =
+		__phys_to_dma(dev, io_tlb_start) & boundary_mask;
+	unsigned long max_slots = get_max_slots(boundary_mask);
+	unsigned int nslots = nr_slots(alloc_size), stride = 1;
+	unsigned int index, wrap, count = 0, i;
+	unsigned long flags;
 
-	/*
-	 * Carefully handle integer overflow which can occur when mask == ~0UL.
-	 */
-	max_slots = mask + 1
-		    ? nr_slots(mask + 1)
-		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+	BUG_ON(!nslots);
 
 	/*
 	 * For mappings greater than or equal to a page, we limit the stride
 	 * (and hence alignment) to a page size.
 	 */
-	nslots = nr_slots(alloc_size);
 	if (alloc_size >= PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
-	BUG_ON(!nslots);
+		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
 
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
-
 	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
 		goto not_found;
 
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
-		index = 0;
-	wrap = index;
-
+	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
 	do {
-		while (iommu_is_span_boundary(index, nslots, offset_slots,
-					      max_slots)) {
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-			if (index == wrap)
-				goto not_found;
-		}
-
 		/*
 		 * If we find a slot that indicates we have 'nslots' number of
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (io_tlb_list[index] >= nslots) {
-			int count = 0;
-
-			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1;
-			     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
-			     io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-			/*
-			 * Update the indices to avoid searching in the next
-			 * round.
-			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
-					? (index + nslots) : 0);
-
-			goto found;
+		if (!iommu_is_span_boundary(index, nslots,
+					    nr_slots(tbl_dma_addr),
+					    max_slots)) {
+			if (io_tlb_list[index] >= nslots)
+				goto found;
 		}
-		index += stride;
-		if (index >= io_tlb_nslabs)
-			index = 0;
+		index = wrap_index(index + stride);
 	} while (index != wrap);
 
 not_found:
-	tmp_io_tlb_used = io_tlb_used;
-
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
-	return (phys_addr_t)DMA_MAPPING_ERROR;
+	return -1;
+
 found:
+	for (i = index; i < index + nslots; i++)
+		io_tlb_list[i] = 0;
+	for (i = index - 1;
+	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+	     io_tlb_list[i]; i--)
+		io_tlb_list[i] = ++count;
+
+	/*
+	 * Update the indices to avoid searching in the next round.
+	 */
+	if (index + nslots < io_tlb_nslabs)
+		io_tlb_index = index + nslots;
+	else
+		io_tlb_index = 0;
 	io_tlb_used += nslots;
+
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	return index;
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, dma_addr_t dma_addr,
+				phys_addr_t orig_addr, size_t mapping_size,
+				size_t alloc_size,
+				enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	unsigned int index, i;
+	phys_addr_t tlb_addr;
+
+	if (no_iotlb_memory)
+		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+	if (mem_encrypt_active())
+		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+	if (mapping_size > alloc_size) {
+		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+			      mapping_size, alloc_size);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
+
+	index = find_slots(dev, alloc_size);
+	if (index == -1) {
+		if (!(attrs & DMA_ATTR_NO_WARN))
+			dev_warn_ratelimited(dev,
+	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+				 alloc_size, io_tlb_nslabs, io_tlb_used);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
 
 	/*
 	 * Save away the mapping from the original address to the DMA address.
 	 * This is needed when we sync the memory.  Then we sync the buffer if
 	 * needed.
 	 */
-	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+	for (i = 0; i < nr_slots(alloc_size); i++)
+		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+
+	tlb_addr = slot_addr(io_tlb_start, index);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
 	return tlb_addr;
 }