diff mbox series

[v5,08/24] dma-direct: support PCI P2PDMA pages in dma-direct map_sg

Message ID 20220128002614.6136-9-logang@deltatee.com
State New
Headers show
Series Userspace P2PDMA with O_DIRECT NVMe devices | expand

Commit Message

Logan Gunthorpe Jan. 28, 2022, 12:25 a.m. UTC
Add PCI P2PDMA support for dma_direct_map_sg() so that it can map
PCI P2PDMA pages directly without a hack in the callers. This allows
for heterogeneous SGLs that contain both P2PDMA and regular pages.

A P2PDMA page may have three possible outcomes when being mapped:
  1) If the data path between the two devices doesn't go through the
     root port, then it should be mapped with a PCI bus address
  2) If the data path goes through the host bridge, it should be mapped
     normally, as though it were a CPU physical address
  3) It is not possible for the two devices to communicate and thus
     the mapping operation should fail (and it will return -EREMOTEIO).

SGL segments that contain PCI bus addresses are marked with
sg_dma_mark_pci_p2pdma() and are ignored when unmapped.

P2PDMA mappings are also failed if swiotlb needs to be used on the
mapping.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
---
 kernel/dma/direct.c | 43 +++++++++++++++++++++++++++++++++++++------
 kernel/dma/direct.h |  7 ++++++-
 2 files changed, 43 insertions(+), 7 deletions(-)

Comments

Jonathan Derrick Feb. 1, 2022, 8:53 p.m. UTC | #1
On 1/27/2022 5:25 PM, Logan Gunthorpe wrote:
> Add PCI P2PDMA support for dma_direct_map_sg() so that it can map
> PCI P2PDMA pages directly without a hack in the callers. This allows
> for heterogeneous SGLs that contain both P2PDMA and regular pages.
> 
> A P2PDMA page may have three possible outcomes when being mapped:
>    1) If the data path between the two devices doesn't go through the
>       root port, then it should be mapped with a PCI bus address
>    2) If the data path goes through the host bridge, it should be mapped
>       normally, as though it were a CPU physical address
>    3) It is not possible for the two devices to communicate and thus
>       the mapping operation should fail (and it will return -EREMOTEIO).
> 
> SGL segments that contain PCI bus addresses are marked with
> sg_dma_mark_pci_p2pdma() and are ignored when unmapped.
> 
> P2PDMA mappings are also failed if swiotlb needs to be used on the
> mapping.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> ---
>   kernel/dma/direct.c | 43 +++++++++++++++++++++++++++++++++++++------
>   kernel/dma/direct.h |  7 ++++++-
>   2 files changed, 43 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 50f48e9e4598..975df5f3aaf9 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -461,29 +461,60 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
>   		arch_sync_dma_for_cpu_all();
>   }
>   
> +/*
> + * Unmaps segments, except for ones marked as pci_p2pdma which do not
> + * require any further action as they contain a bus address.
> + */
>   void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
>   		int nents, enum dma_data_direction dir, unsigned long attrs)
>   {
>   	struct scatterlist *sg;
>   	int i;
>   
> -	for_each_sg(sgl, sg, nents, i)
> -		dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
> -			     attrs);
> +	for_each_sg(sgl,  sg, nents, i) {
> +		if (sg_is_dma_bus_address(sg))
> +			sg_dma_unmark_bus_address(sg);
> +		else
> +			dma_direct_unmap_page(dev, sg->dma_address,
> +					      sg_dma_len(sg), dir, attrs);
> +	}
>   }
>   #endif
>   
>   int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
>   		enum dma_data_direction dir, unsigned long attrs)
>   {
> -	int i;
> +	struct pci_p2pdma_map_state p2pdma_state = {};
> +	enum pci_p2pdma_map_type map;
>   	struct scatterlist *sg;
> +	int i, ret;
>   
>   	for_each_sg(sgl, sg, nents, i) {
> +		if (is_pci_p2pdma_page(sg_page(sg))) {
> +			map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg);
> +			switch (map) {
> +			case PCI_P2PDMA_MAP_BUS_ADDR:
> +				continue;
> +			case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
> +				/*
> +				 * Any P2P mapping that traverses the PCI
> +				 * host bridge must be mapped with CPU physical
> +				 * address and not PCI bus addresses. This is
> +				 * done with dma_direct_map_page() below.
> +				 */
> +				break;
> +			default:
> +				ret = -EREMOTEIO;
> +				goto out_unmap;
> +			}
> +		}
I'm a little confused about this code. Would there be a case where the mapping needs
to be checked for each sg in the list? And if some sg in the sgl can be mapped
differently, would we want to continue checking the rest of the sg in the sgl instead
of breaking out of the loop completely?

> +
>   		sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
>   				sg->offset, sg->length, dir, attrs);
> -		if (sg->dma_address == DMA_MAPPING_ERROR)
> +		if (sg->dma_address == DMA_MAPPING_ERROR) {
> +			ret = -EIO;
>   			goto out_unmap;
> +		}
>   		sg_dma_len(sg) = sg->length;
>   	}
>   
> @@ -491,7 +522,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
>   
>   out_unmap:
>   	dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
> -	return -EIO;
> +	return ret;
>   }
>   
>   dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
> diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
> index 4632b0f4f72e..a33152d79069 100644
> --- a/kernel/dma/direct.h
> +++ b/kernel/dma/direct.h
> @@ -87,10 +87,15 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
>   	phys_addr_t phys = page_to_phys(page) + offset;
>   	dma_addr_t dma_addr = phys_to_dma(dev, phys);
>   
> -	if (is_swiotlb_force_bounce(dev))
> +	if (is_swiotlb_force_bounce(dev)) {
> +		if (is_pci_p2pdma_page(page))
> +			return DMA_MAPPING_ERROR;
>   		return swiotlb_map(dev, phys, size, dir, attrs);
> +	}
>   
>   	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
> +		if (is_pci_p2pdma_page(page))
> +			return DMA_MAPPING_ERROR;
>   		if (swiotlb_force != SWIOTLB_NO_FORCE)
>   			return swiotlb_map(dev, phys, size, dir, attrs);
>
Logan Gunthorpe Feb. 1, 2022, 8:57 p.m. UTC | #2
On 2022-02-01 1:53 p.m., Jonathan Derrick wrote:
> 
> 
> On 1/27/2022 5:25 PM, Logan Gunthorpe wrote:
>> Add PCI P2PDMA support for dma_direct_map_sg() so that it can map
>> PCI P2PDMA pages directly without a hack in the callers. This allows
>> for heterogeneous SGLs that contain both P2PDMA and regular pages.
>>
>> A P2PDMA page may have three possible outcomes when being mapped:
>>    1) If the data path between the two devices doesn't go through the
>>       root port, then it should be mapped with a PCI bus address
>>    2) If the data path goes through the host bridge, it should be mapped
>>       normally, as though it were a CPU physical address
>>    3) It is not possible for the two devices to communicate and thus
>>       the mapping operation should fail (and it will return -EREMOTEIO).
>>
>> SGL segments that contain PCI bus addresses are marked with
>> sg_dma_mark_pci_p2pdma() and are ignored when unmapped.
>>
>> P2PDMA mappings are also failed if swiotlb needs to be used on the
>> mapping.
>>
>> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
>> ---
>>   kernel/dma/direct.c | 43 +++++++++++++++++++++++++++++++++++++------
>>   kernel/dma/direct.h |  7 ++++++-
>>   2 files changed, 43 insertions(+), 7 deletions(-)
>>
>> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
>> index 50f48e9e4598..975df5f3aaf9 100644
>> --- a/kernel/dma/direct.c
>> +++ b/kernel/dma/direct.c
>> @@ -461,29 +461,60 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
>>   		arch_sync_dma_for_cpu_all();
>>   }
>>   
>> +/*
>> + * Unmaps segments, except for ones marked as pci_p2pdma which do not
>> + * require any further action as they contain a bus address.
>> + */
>>   void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
>>   		int nents, enum dma_data_direction dir, unsigned long attrs)
>>   {
>>   	struct scatterlist *sg;
>>   	int i;
>>   
>> -	for_each_sg(sgl, sg, nents, i)
>> -		dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
>> -			     attrs);
>> +	for_each_sg(sgl,  sg, nents, i) {
>> +		if (sg_is_dma_bus_address(sg))
>> +			sg_dma_unmark_bus_address(sg);
>> +		else
>> +			dma_direct_unmap_page(dev, sg->dma_address,
>> +					      sg_dma_len(sg), dir, attrs);
>> +	}
>>   }
>>   #endif
>>   
>>   int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
>>   		enum dma_data_direction dir, unsigned long attrs)
>>   {
>> -	int i;
>> +	struct pci_p2pdma_map_state p2pdma_state = {};
>> +	enum pci_p2pdma_map_type map;
>>   	struct scatterlist *sg;
>> +	int i, ret;
>>   
>>   	for_each_sg(sgl, sg, nents, i) {
>> +		if (is_pci_p2pdma_page(sg_page(sg))) {
>> +			map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg);
>> +			switch (map) {
>> +			case PCI_P2PDMA_MAP_BUS_ADDR:
>> +				continue;
>> +			case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
>> +				/*
>> +				 * Any P2P mapping that traverses the PCI
>> +				 * host bridge must be mapped with CPU physical
>> +				 * address and not PCI bus addresses. This is
>> +				 * done with dma_direct_map_page() below.
>> +				 */
>> +				break;
>> +			default:
>> +				ret = -EREMOTEIO;
>> +				goto out_unmap;
>> +			}
>> +		}
> I'm a little confused about this code. Would there be a case where the mapping needs
> to be checked for each sg in the list? And if some sg in the sgl can be mapped
> differently, would we want to continue checking the rest of the sg in the sgl instead
> of breaking out of the loop completely?

Yes, the code supports heterogeneous SGLs with P2PDMA and regular
memory; it's also theoretically possible to mix P2PDMA memory for
different devices. So yes, the mapping must be checked for every SG in
the list. It can't just see one SG that points to P2PDMA memory and
assume the rest are all good.

Logan
diff mbox series

Patch

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 50f48e9e4598..975df5f3aaf9 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -461,29 +461,60 @@  void dma_direct_sync_sg_for_cpu(struct device *dev,
 		arch_sync_dma_for_cpu_all();
 }
 
+/*
+ * Unmaps segments, except for ones marked as pci_p2pdma which do not
+ * require any further action as they contain a bus address.
+ */
 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
 		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
 
-	for_each_sg(sgl, sg, nents, i)
-		dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
-			     attrs);
+	for_each_sg(sgl,  sg, nents, i) {
+		if (sg_is_dma_bus_address(sg))
+			sg_dma_unmark_bus_address(sg);
+		else
+			dma_direct_unmap_page(dev, sg->dma_address,
+					      sg_dma_len(sg), dir, attrs);
+	}
 }
 #endif
 
 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	int i;
+	struct pci_p2pdma_map_state p2pdma_state = {};
+	enum pci_p2pdma_map_type map;
 	struct scatterlist *sg;
+	int i, ret;
 
 	for_each_sg(sgl, sg, nents, i) {
+		if (is_pci_p2pdma_page(sg_page(sg))) {
+			map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg);
+			switch (map) {
+			case PCI_P2PDMA_MAP_BUS_ADDR:
+				continue;
+			case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+				/*
+				 * Any P2P mapping that traverses the PCI
+				 * host bridge must be mapped with CPU physical
+				 * address and not PCI bus addresses. This is
+				 * done with dma_direct_map_page() below.
+				 */
+				break;
+			default:
+				ret = -EREMOTEIO;
+				goto out_unmap;
+			}
+		}
+
 		sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
 				sg->offset, sg->length, dir, attrs);
-		if (sg->dma_address == DMA_MAPPING_ERROR)
+		if (sg->dma_address == DMA_MAPPING_ERROR) {
+			ret = -EIO;
 			goto out_unmap;
+		}
 		sg_dma_len(sg) = sg->length;
 	}
 
@@ -491,7 +522,7 @@  int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 
 out_unmap:
 	dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
-	return -EIO;
+	return ret;
 }
 
 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 4632b0f4f72e..a33152d79069 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -87,10 +87,15 @@  static inline dma_addr_t dma_direct_map_page(struct device *dev,
 	phys_addr_t phys = page_to_phys(page) + offset;
 	dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-	if (is_swiotlb_force_bounce(dev))
+	if (is_swiotlb_force_bounce(dev)) {
+		if (is_pci_p2pdma_page(page))
+			return DMA_MAPPING_ERROR;
 		return swiotlb_map(dev, phys, size, dir, attrs);
+	}
 
 	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
+		if (is_pci_p2pdma_page(page))
+			return DMA_MAPPING_ERROR;
 		if (swiotlb_force != SWIOTLB_NO_FORCE)
 			return swiotlb_map(dev, phys, size, dir, attrs);