diff mbox series

[v3,01/20] lib/scatterlist: add flag for indicating P2PDMA segments in an SGL

Message ID 20210916234100.122368-2-logang@deltatee.com
State New
Headers show
Series Userspace P2PDMA with O_DIRECT NVMe devices | expand

Commit Message

Logan Gunthorpe Sept. 16, 2021, 11:40 p.m. UTC
Make use of the third free LSB in scatterlist's page_link on 64bit systems.

The extra bit will be used by dma_[un]map_sg_p2pdma() to determine when a
given SGL segments dma_address points to a PCI bus address.
dma_unmap_sg_p2pdma() will need to perform different cleanup when a
segment is marked as P2PDMA.

Using this bit requires adding an additional dependency on CONFIG_64BIT to
CONFIG_PCI_P2PDMA. This should be acceptable as the majority of P2PDMA
use cases are restricted to newer root complexes and roughly require the
extra address space for memory BARs used in the transactions.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/pci/Kconfig         |  2 +-
 include/linux/scatterlist.h | 50 ++++++++++++++++++++++++++++++++++---
 2 files changed, 47 insertions(+), 5 deletions(-)

Comments

Jason Gunthorpe Sept. 28, 2021, 6:32 p.m. UTC | #1
On Thu, Sep 16, 2021 at 05:40:41PM -0600, Logan Gunthorpe wrote:
> Make use of the third free LSB in scatterlist's page_link on 64bit systems.
> 
> The extra bit will be used by dma_[un]map_sg_p2pdma() to determine when a
> given SGL segments dma_address points to a PCI bus address.
> dma_unmap_sg_p2pdma() will need to perform different cleanup when a
> segment is marked as P2PDMA.
> 
> Using this bit requires adding an additional dependency on CONFIG_64BIT to
> CONFIG_PCI_P2PDMA. This should be acceptable as the majority of P2PDMA
> use cases are restricted to newer root complexes and roughly require the
> extra address space for memory BARs used in the transactions.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Reviewed-by: John Hubbard <jhubbard@nvidia.com>
>  drivers/pci/Kconfig         |  2 +-
>  include/linux/scatterlist.h | 50 ++++++++++++++++++++++++++++++++++---
>  2 files changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 0c473d75e625..90b4bddb3300 100644
> +++ b/drivers/pci/Kconfig
> @@ -163,7 +163,7 @@ config PCI_PASID
>  
>  config PCI_P2PDMA
>  	bool "PCI peer-to-peer transfer support"
> -	depends on ZONE_DEVICE
> +	depends on ZONE_DEVICE && 64BIT

Perhaps a comment to explain what the 64bit is doing?

>  	select GENERIC_ALLOCATOR
>  	help
>  	  Enableѕ drivers to do PCI peer-to-peer transactions to and from
> diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
> index 266754a55327..e62b1cf6386f 100644
> +++ b/include/linux/scatterlist.h
> @@ -64,6 +64,21 @@ struct sg_append_table {
>  #define SG_CHAIN	0x01UL
>  #define SG_END		0x02UL
>  
> +/*
> + * bit 2 is the third free bit in the page_link on 64bit systems which
> + * is used by dma_unmap_sg() to determine if the dma_address is a PCI
> + * bus address when doing P2PDMA.
> + * Note: CONFIG_PCI_P2PDMA depends on CONFIG_64BIT because of this.
> + */
> +
> +#ifdef CONFIG_PCI_P2PDMA
> +#define SG_DMA_PCI_P2PDMA	0x04UL

Add a 
	static_assert(__alignof__(void *) == 8);

?

> +#else
> +#define SG_DMA_PCI_P2PDMA	0x00UL
> +#endif
> +
> +#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END | SG_DMA_PCI_P2PDMA)
> +
>  /*
>   * We overload the LSB of the page pointer to indicate whether it's
>   * a valid sg entry, or whether it points to the start of a new scatterlist.
> @@ -72,7 +87,9 @@ struct sg_append_table {
>  #define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
>  #define sg_is_last(sg)		((sg)->page_link & SG_END)
>  #define sg_chain_ptr(sg)	\
> -	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
> +	((struct scatterlist *)((sg)->page_link & ~SG_PAGE_LINK_MASK))
> +
> +#define sg_is_dma_pci_p2pdma(sg) ((sg)->page_link & SG_DMA_PCI_P2PDMA)

I've been encouraging people to use static inlines more..

static inline unsigned int __sg_flags(struct scatterlist *sg)
{
	return sg->page_link & SG_PAGE_LINK_MASK;
}
static inline bool sg_is_chain(struct scatterlist *sg)
{
	return __sg_flags(sg) & SG_CHAIN;
}
static inline bool sg_is_last(struct scatterlist *sg)
{
	return __sg_flags(sg) & SG_END;
}
static inline bool sg_is_dma_pci_p2pdma(struct scatterlist *sg)
{
	return __sg_flags(sg) & SG_DMA_PCI_P2PDMA;
}

>  /**
>   * sg_assign_page - Assign a given page to an SG entry
> @@ -86,13 +103,13 @@ struct sg_append_table {
>   **/
>  static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
>  {
> -	unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END);
> +	unsigned long page_link = sg->page_link & SG_PAGE_LINK_MASK;

I think this should just be '& SG_END', sg_assign_page() doesn't look
like it should ever be used on a sg_chain entry, so this is just
trying to preserve the end stamp.

Anyway, this looks OK

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
Logan Gunthorpe Sept. 29, 2021, 9:15 p.m. UTC | #2
On 2021-09-28 12:32 p.m., Jason Gunthorpe wrote:
> On Thu, Sep 16, 2021 at 05:40:41PM -0600, Logan Gunthorpe wrote:
>>  config PCI_P2PDMA
>>  	bool "PCI peer-to-peer transfer support"
>> -	depends on ZONE_DEVICE
>> +	depends on ZONE_DEVICE && 64BIT
> 
> Perhaps a comment to explain what the 64bit is doing?

Added.

>>  	select GENERIC_ALLOCATOR
>>  	help
>>  	  Enableѕ drivers to do PCI peer-to-peer transactions to and from
>> diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
>> index 266754a55327..e62b1cf6386f 100644
>> +++ b/include/linux/scatterlist.h
>> @@ -64,6 +64,21 @@ struct sg_append_table {
>>  #define SG_CHAIN	0x01UL
>>  #define SG_END		0x02UL
>>  
>> +/*
>> + * bit 2 is the third free bit in the page_link on 64bit systems which
>> + * is used by dma_unmap_sg() to determine if the dma_address is a PCI
>> + * bus address when doing P2PDMA.
>> + * Note: CONFIG_PCI_P2PDMA depends on CONFIG_64BIT because of this.
>> + */
>> +
>> +#ifdef CONFIG_PCI_P2PDMA
>> +#define SG_DMA_PCI_P2PDMA	0x04UL
> 
> Add a 
> 	static_assert(__alignof__(void *) == 8);
> 
> ?

Good idea. Though, I think your line isn't quite correct. I've added:

static_assert(__alignof__(struct page) >= 8);

>> +#define sg_is_dma_pci_p2pdma(sg) ((sg)->page_link & SG_DMA_PCI_P2PDMA)
> 
> I've been encouraging people to use static inlines more..

I also prefer static inlines, but I usually follow the style of the code
I'm changing. In any case, I've changed to static inlines similar to
your example.

>>  /**
>>   * sg_assign_page - Assign a given page to an SG entry
>> @@ -86,13 +103,13 @@ struct sg_append_table {
>>   **/
>>  static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
>>  {
>> -	unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END);
>> +	unsigned long page_link = sg->page_link & SG_PAGE_LINK_MASK;
> 
> I think this should just be '& SG_END', sg_assign_page() doesn't look
> like it should ever be used on a sg_chain entry, so this is just
> trying to preserve the end stamp.

Perhaps, but I'm not comfortable making that change in this patch or
series. Though, I've reverted this specific change in my patch so
sg_assign_page() will clear SG_DMA_PCI_P2PDMA.

Logan
Chaitanya Kulkarni Sept. 30, 2021, 4:47 a.m. UTC | #3
Logan,

> +/*
> + * bit 2 is the third free bit in the page_link on 64bit systems which
> + * is used by dma_unmap_sg() to determine if the dma_address is a PCI
> + * bus address when doing P2PDMA.
> + * Note: CONFIG_PCI_P2PDMA depends on CONFIG_64BIT because of this.
> + */
> +
> +#ifdef CONFIG_PCI_P2PDMA
> +#define SG_DMA_PCI_P2PDMA      0x04UL
> +#else
> +#define SG_DMA_PCI_P2PDMA      0x00UL
> +#endif
> +
> +#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END | SG_DMA_PCI_P2PDMA)
> +

You are doing two things in one patch :-
1. Introducing a new macro to replace the current macros.
2. Adding a new member to those macros.

shouldn't this be split into two patches where you introduce a
macro SG_PAGE_LINK_MASK (SG_CHAIN | SG_END) in prep patch and
update the SF_PAGE_LINK_MASK with SG_DMA_PCI_P2PDMA with related
code?

OR

Is there a reason why it is not split ?

-ck
Chaitanya Kulkarni Sept. 30, 2021, 4:57 a.m. UTC | #4
> +/**
> + * sg_unmark_pci_p2pdma - Unmark the scatterlist entry for PCI p2pdma
> + * @sg:                 SG entryScatterlist
> + *
> + * Description:
> + *   Clears the PCI P2PDMA mark
> + **/
nit:- Probably want to add '.' above.
> +static inline void sg_dma_unmark_pci_p2pdma(struct scatterlist *sg)
> +{
> +       sg->page_link &= ~SG_DMA_PCI_P2PDMA;
> +}
> +
>   /**
>    * sg_phys - Return physical address of an sg entry
>    * @sg:             SG entry
> --
> 2.30.2
> 

either ways with or without split, looks good.

Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Logan Gunthorpe Sept. 30, 2021, 4:49 p.m. UTC | #5
On 2021-09-29 10:47 p.m., Chaitanya Kulkarni wrote:
> Logan,
> 
>> +/*
>> + * bit 2 is the third free bit in the page_link on 64bit systems which
>> + * is used by dma_unmap_sg() to determine if the dma_address is a PCI
>> + * bus address when doing P2PDMA.
>> + * Note: CONFIG_PCI_P2PDMA depends on CONFIG_64BIT because of this.
>> + */
>> +
>> +#ifdef CONFIG_PCI_P2PDMA
>> +#define SG_DMA_PCI_P2PDMA      0x04UL
>> +#else
>> +#define SG_DMA_PCI_P2PDMA      0x00UL
>> +#endif
>> +
>> +#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END | SG_DMA_PCI_P2PDMA)
>> +
> 
> You are doing two things in one patch :-
> 1. Introducing a new macro to replace the current macros.
> 2. Adding a new member to those macros.
> 
> shouldn't this be split into two patches where you introduce a
> macro SG_PAGE_LINK_MASK (SG_CHAIN | SG_END) in prep patch and
> update the SF_PAGE_LINK_MASK with SG_DMA_PCI_P2PDMA with related
> code?
> 

Ok, will split. I'll also add the static inline cleanup Jason suggested
in the first patch.

Logan
diff mbox series

Patch

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 0c473d75e625..90b4bddb3300 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -163,7 +163,7 @@  config PCI_PASID
 
 config PCI_P2PDMA
 	bool "PCI peer-to-peer transfer support"
-	depends on ZONE_DEVICE
+	depends on ZONE_DEVICE && 64BIT
 	select GENERIC_ALLOCATOR
 	help
 	  Enableѕ drivers to do PCI peer-to-peer transactions to and from
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 266754a55327..e62b1cf6386f 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -64,6 +64,21 @@  struct sg_append_table {
 #define SG_CHAIN	0x01UL
 #define SG_END		0x02UL
 
+/*
+ * bit 2 is the third free bit in the page_link on 64bit systems which
+ * is used by dma_unmap_sg() to determine if the dma_address is a PCI
+ * bus address when doing P2PDMA.
+ * Note: CONFIG_PCI_P2PDMA depends on CONFIG_64BIT because of this.
+ */
+
+#ifdef CONFIG_PCI_P2PDMA
+#define SG_DMA_PCI_P2PDMA	0x04UL
+#else
+#define SG_DMA_PCI_P2PDMA	0x00UL
+#endif
+
+#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END | SG_DMA_PCI_P2PDMA)
+
 /*
  * We overload the LSB of the page pointer to indicate whether it's
  * a valid sg entry, or whether it points to the start of a new scatterlist.
@@ -72,7 +87,9 @@  struct sg_append_table {
 #define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
 #define sg_is_last(sg)		((sg)->page_link & SG_END)
 #define sg_chain_ptr(sg)	\
-	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+	((struct scatterlist *)((sg)->page_link & ~SG_PAGE_LINK_MASK))
+
+#define sg_is_dma_pci_p2pdma(sg) ((sg)->page_link & SG_DMA_PCI_P2PDMA)
 
 /**
  * sg_assign_page - Assign a given page to an SG entry
@@ -86,13 +103,13 @@  struct sg_append_table {
  **/
 static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 {
-	unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END);
+	unsigned long page_link = sg->page_link & SG_PAGE_LINK_MASK;
 
 	/*
 	 * In order for the low bit stealing approach to work, pages
 	 * must be aligned at a 32-bit boundary as a minimum.
 	 */
-	BUG_ON((unsigned long) page & (SG_CHAIN | SG_END));
+	BUG_ON((unsigned long)page & SG_PAGE_LINK_MASK);
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg_is_chain(sg));
 #endif
@@ -126,7 +143,7 @@  static inline struct page *sg_page(struct scatterlist *sg)
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg_is_chain(sg));
 #endif
-	return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END));
+	return (struct page *)((sg)->page_link & ~SG_PAGE_LINK_MASK);
 }
 
 /**
@@ -228,6 +245,31 @@  static inline void sg_unmark_end(struct scatterlist *sg)
 	sg->page_link &= ~SG_END;
 }
 
+/**
+ * sg_dma_mark_pci_p2pdma - Mark the scatterlist entry for PCI p2pdma
+ * @sg:		 SG entryScatterlist
+ *
+ * Description:
+ *   Marks the passed in sg entry to indicate that the dma_address is
+ *   a PCI bus address.
+ **/
+static inline void sg_dma_mark_pci_p2pdma(struct scatterlist *sg)
+{
+	sg->page_link |= SG_DMA_PCI_P2PDMA;
+}
+
+/**
+ * sg_unmark_pci_p2pdma - Unmark the scatterlist entry for PCI p2pdma
+ * @sg:		 SG entryScatterlist
+ *
+ * Description:
+ *   Clears the PCI P2PDMA mark
+ **/
+static inline void sg_dma_unmark_pci_p2pdma(struct scatterlist *sg)
+{
+	sg->page_link &= ~SG_DMA_PCI_P2PDMA;
+}
+
 /**
  * sg_phys - Return physical address of an sg entry
  * @sg:	     SG entry