diff mbox series

[2/2] powernv/npu: Add a debugfs setting to change ATSD threshold

Message ID 20180417091129.23069-2-alistair@popple.id.au (mailing list archive)
State Accepted
Commit 99c3ce33a00bc40cb218af770ef00c82c8044c36
Headers show
Series [1/2] powernv/npu: Do a PID GPU TLB flush when invalidating a large address range | expand

Commit Message

Alistair Popple April 17, 2018, 9:11 a.m. UTC
The threshold at which it becomes more efficient to coalesce a range of
ATSDs into a single per-PID ATSD is currently not well understood due to a
lack of real-world work loads. This patch adds a debugfs parameter allowing
the threshold to be altered at runtime in order to aid future development
and refinement of the value.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

Comments

Balbir Singh April 17, 2018, 9:45 p.m. UTC | #1
On Tue, 17 Apr 2018 19:11:29 +1000
Alistair Popple <alistair@popple.id.au> wrote:

> The threshold at which it becomes more efficient to coalesce a range of
> ATSDs into a single per-PID ATSD is currently not well understood due to a
> lack of real-world work loads. This patch adds a debugfs parameter allowing
> the threshold to be altered at runtime in order to aid future development
> and refinement of the value.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index dc34662e9df9..a765bf576c14 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -17,7 +17,9 @@
>  #include <linux/pci.h>
>  #include <linux/memblock.h>
>  #include <linux/iommu.h>
> +#include <linux/debugfs.h>
>  
> +#include <asm/debugfs.h>
>  #include <asm/tlb.h>
>  #include <asm/powernv.h>
>  #include <asm/reg.h>
> @@ -44,7 +46,8 @@ DEFINE_SPINLOCK(npu_context_lock);
>   * entire TLB on the GPU for the given PID rather than each specific address in
>   * the range.
>   */
> -#define ATSD_THRESHOLD (2*1024*1024)
> +static uint64_t atsd_threshold = 2 * 1024 * 1024;
> +static struct dentry *atsd_threshold_dentry;
>  
>  /*
>   * Other types of TCE cache invalidation are not functional in the
> @@ -682,7 +685,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
>  	struct npu_context *npu_context = mn_to_npu_context(mn);
>  	unsigned long address;
>  
> -	if (end - start > ATSD_THRESHOLD) {
> +	if (end - start > atsd_threshold) {
>  		/*
>  		 * Just invalidate the entire PID if the address range is too
>  		 * large.
> @@ -956,6 +959,11 @@ int pnv_npu2_init(struct pnv_phb *phb)
>  	static int npu_index;
>  	uint64_t rc = 0;
>  
> +	if (!atsd_threshold_dentry) {
> +		atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",

Nit-picking can we call this atsd_threshold_in_bytes?

> +				   0600, powerpc_debugfs_root, &atsd_threshold);
> +	}
> +
>  	phb->npu.nmmu_flush =
>  		of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
>  	for_each_child_of_node(phb->hose->dn, dn) {

Acked-by: Balbir Singh <bsingharora@gmail.com>
Michael Ellerman July 23, 2018, 3:11 p.m. UTC | #2
On Tue, 2018-04-17 at 09:11:29 UTC, Alistair Popple wrote:
> The threshold at which it becomes more efficient to coalesce a range of
> ATSDs into a single per-PID ATSD is currently not well understood due to a
> lack of real-world work loads. This patch adds a debugfs parameter allowing
> the threshold to be altered at runtime in order to aid future development
> and refinement of the value.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> Acked-by: Balbir Singh <bsingharora@gmail.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/99c3ce33a00bc40cb218af770ef00c

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index dc34662e9df9..a765bf576c14 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -17,7 +17,9 @@ 
 #include <linux/pci.h>
 #include <linux/memblock.h>
 #include <linux/iommu.h>
+#include <linux/debugfs.h>
 
+#include <asm/debugfs.h>
 #include <asm/tlb.h>
 #include <asm/powernv.h>
 #include <asm/reg.h>
@@ -44,7 +46,8 @@  DEFINE_SPINLOCK(npu_context_lock);
  * entire TLB on the GPU for the given PID rather than each specific address in
  * the range.
  */
-#define ATSD_THRESHOLD (2*1024*1024)
+static uint64_t atsd_threshold = 2 * 1024 * 1024;
+static struct dentry *atsd_threshold_dentry;
 
 /*
  * Other types of TCE cache invalidation are not functional in the
@@ -682,7 +685,7 @@  static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	if (end - start > ATSD_THRESHOLD) {
+	if (end - start > atsd_threshold) {
 		/*
 		 * Just invalidate the entire PID if the address range is too
 		 * large.
@@ -956,6 +959,11 @@  int pnv_npu2_init(struct pnv_phb *phb)
 	static int npu_index;
 	uint64_t rc = 0;
 
+	if (!atsd_threshold_dentry) {
+		atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
+				   0600, powerpc_debugfs_root, &atsd_threshold);
+	}
+
 	phb->npu.nmmu_flush =
 		of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
 	for_each_child_of_node(phb->hose->dn, dn) {