Message ID | 20171214011008.4388-1-bsingharora@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | [v2] powerpc/npu: Cleanup MMIO ATSD flushing | expand |
Thanks Balbir, one question below. I have no way of testing this at present but it looks ok to me. Thanks! On Thursday, 14 December 2017 12:10:08 PM AEDT Balbir Singh wrote: > While reviewing the code I found that the flush assumes all > pages are of mmu_linear_psize, which is not correct. The patch > uses find_linux_pte to find the right page size and uses that > for launching the ATSD invalidation. A new helper is added > to abstract the invalidation from the various notifiers. > > The patch also cleans up a bit by removing AP size from PID > flushes. > > Signed-off-by: Balbir Singh <bsingharora@gmail.com> > --- > > Changelog: > Refactor the handling of return values from find_linux_pte > as suggested by Aneesh > > arch/powerpc/platforms/powernv/npu-dma.c | 55 ++++++++++++++++++++++---------- > 1 file changed, 38 insertions(+), 17 deletions(-) > > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c > index f6cbc1a71472..e8caa3e2019d 100644 > --- a/arch/powerpc/platforms/powernv/npu-dma.c > +++ b/arch/powerpc/platforms/powernv/npu-dma.c > @@ -17,6 +17,7 @@ > #include <linux/pci.h> > #include <linux/memblock.h> > #include <linux/iommu.h> > +#include <linux/huge_mm.h> > > #include <asm/tlb.h> > #include <asm/powernv.h> > @@ -27,6 +28,7 @@ > #include <asm/pnv-pci.h> > #include <asm/msi_bitmap.h> > #include <asm/opal.h> > +#include <asm/pte-walk.h> > > #include "powernv.h" > #include "pci.h" > @@ -460,9 +462,6 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) > /* PRS set to process-scoped */ > launch |= PPC_BIT(13); > > - /* AP */ > - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > > /* PID */ > launch |= pid << PPC_BITLSHIFT(38); > > @@ -474,7 +473,8 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) > } > > static int mmio_invalidate_va(struct npu *npu, unsigned long va, > - unsigned long pid, bool flush) > + unsigned long pid, bool flush, > + unsigned int shift) > { > unsigned long launch; > > @@ -485,9 +485,8 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, > launch |= PPC_BIT(13); > > /* AP */ > - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > + launch |= (u64) mmu_get_ap(shift) << PPC_BITLSHIFT(17); > > - /* PID */ > launch |= pid << PPC_BITLSHIFT(38); > > /* No flush */ > @@ -504,7 +503,8 @@ struct mmio_atsd_reg { > }; > > static void mmio_invalidate_wait( > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush, > + unsigned int shift) > { > struct npu *npu; > int i, reg; > @@ -537,7 +537,8 @@ static void mmio_invalidate_wait( > * the value of va. > */ > static void mmio_invalidate(struct npu_context *npu_context, int va, > - unsigned long address, bool flush) > + unsigned long address, bool flush, > + unsigned int shift) > { > int i, j; > struct npu *npu; > @@ -572,7 +573,7 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, > if (va) > mmio_atsd_reg[i].reg = > mmio_invalidate_va(npu, address, pid, > - flush); > + flush, shift); > else > mmio_atsd_reg[i].reg = > mmio_invalidate_pid(npu, pid, flush); > @@ -585,10 +586,32 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, > } > } > > - mmio_invalidate_wait(mmio_atsd_reg, flush); > + mmio_invalidate_wait(mmio_atsd_reg, flush, shift); > if (flush) > /* Wait for the flush to complete */ > - mmio_invalidate_wait(mmio_atsd_reg, false); > + mmio_invalidate_wait(mmio_atsd_reg, false, shift); > +} > + > +static void pnv_npu2_invalidate_helper(struct npu_context *npu_context, > + struct mm_struct *mm, unsigned long start, > + unsigned long end, bool flush) > +{ > + unsigned long address; > + unsigned int hshift = 0, shift; > + > + address = start; > + do { > + local_irq_disable(); > + find_linux_pte(mm->pgd, address, NULL, &hshift); > + if (hshift) > + shift = hshift; > + else > + shift = PAGE_SHIFT; Looks better, thanks! Also in future we might be able to futher optimise this as I don't think the shift needs to match the actual page size as we are directly issuing ATSDs to the GPU. ie. we could bump shift to cover the whole of (start, end) or to invalidate larger chunks at a time - I don't think we need to limit it to PAGE_SHIFT. > + mmio_invalidate(npu_context, address > 0, address, flush, > + shift); If address == 0 we end up invalidating the entire PID rather than just the page at address 0. Probably not a big issue though as I'm guessing we wouldn't actually see invalidations for the page@0 very often? > + local_irq_enable(); > + address += (1ull << shift); > + } while (address < end); > } > > static void pnv_npu2_mn_release(struct mmu_notifier *mn, > @@ -604,7 +627,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, > * There should be no more translation requests for this PID, but we > * need to ensure any entries for it are removed from the TLB. > */ > - mmio_invalidate(npu_context, 0, 0, true); > + pnv_npu2_invalidate_helper(npu_context, mm, 0, PAGE_SIZE, true); > } > > static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > @@ -614,7 +637,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > { > struct npu_context *npu_context = mn_to_npu_context(mn); > > - mmio_invalidate(npu_context, 1, address, true); > + pnv_npu2_invalidate_helper(npu_context, mm, address, address, true); > } > > static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > @@ -622,13 +645,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > unsigned long start, unsigned long end) > { > struct npu_context *npu_context = mn_to_npu_context(mn); > - unsigned long address; > > - for (address = start; address < end; address += PAGE_SIZE) > - mmio_invalidate(npu_context, 1, address, false); > + pnv_npu2_invalidate_helper(npu_context, mm, start, end, false); > > /* Do the flush only on the final addess == end */ > - mmio_invalidate(npu_context, 1, address, true); > + pnv_npu2_invalidate_helper(npu_context, mm, end, end, true); > } > > static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { >
On Tuesday, 16 January 2018 3:15:05 PM AEDT Alistair Popple wrote: > Thanks Balbir, one question below. I have no way of testing this at present but > it looks ok to me. Thanks! The below are more future optimisations once we can test. So in the meantime: Acked-by: Alistair Popple <alistair@popple.id.au> > On Thursday, 14 December 2017 12:10:08 PM AEDT Balbir Singh wrote: > > While reviewing the code I found that the flush assumes all > > pages are of mmu_linear_psize, which is not correct. The patch > > uses find_linux_pte to find the right page size and uses that > > for launching the ATSD invalidation. A new helper is added > > to abstract the invalidation from the various notifiers. > > > > The patch also cleans up a bit by removing AP size from PID > > flushes. > > > > Signed-off-by: Balbir Singh <bsingharora@gmail.com> > > --- > > > > Changelog: > > Refactor the handling of return values from find_linux_pte > > as suggested by Aneesh > > > > arch/powerpc/platforms/powernv/npu-dma.c | 55 ++++++++++++++++++++++---------- > > 1 file changed, 38 insertions(+), 17 deletions(-) > > > > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c > > index f6cbc1a71472..e8caa3e2019d 100644 > > --- a/arch/powerpc/platforms/powernv/npu-dma.c > > +++ b/arch/powerpc/platforms/powernv/npu-dma.c > > @@ -17,6 +17,7 @@ > > #include <linux/pci.h> > > #include <linux/memblock.h> > > #include <linux/iommu.h> > > +#include <linux/huge_mm.h> > > > > #include <asm/tlb.h> > > #include <asm/powernv.h> > > @@ -27,6 +28,7 @@ > > #include <asm/pnv-pci.h> > > #include <asm/msi_bitmap.h> > > #include <asm/opal.h> > > +#include <asm/pte-walk.h> > > > > #include "powernv.h" > > #include "pci.h" > > @@ -460,9 +462,6 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) > > /* PRS set to process-scoped */ > > launch |= PPC_BIT(13); > > > > - /* AP */ > > - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > > > > /* PID */ > > launch |= pid << PPC_BITLSHIFT(38); > > > > @@ -474,7 +473,8 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) > > } > > > > static int mmio_invalidate_va(struct npu *npu, unsigned long va, > > - unsigned long pid, bool flush) > > + unsigned long pid, bool flush, > > + unsigned int shift) > > { > > unsigned long launch; > > > > @@ -485,9 +485,8 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, > > launch |= PPC_BIT(13); > > > > /* AP */ > > - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); > > + launch |= (u64) mmu_get_ap(shift) << PPC_BITLSHIFT(17); > > > > - /* PID */ > > launch |= pid << PPC_BITLSHIFT(38); > > > > /* No flush */ > > @@ -504,7 +503,8 @@ struct mmio_atsd_reg { > > }; > > > > static void mmio_invalidate_wait( > > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) > > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush, > > + unsigned int shift) > > { > > struct npu *npu; > > int i, reg; > > @@ -537,7 +537,8 @@ static void mmio_invalidate_wait( > > * the value of va. > > */ > > static void mmio_invalidate(struct npu_context *npu_context, int va, > > - unsigned long address, bool flush) > > + unsigned long address, bool flush, > > + unsigned int shift) > > { > > int i, j; > > struct npu *npu; > > @@ -572,7 +573,7 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, > > if (va) > > mmio_atsd_reg[i].reg = > > mmio_invalidate_va(npu, address, pid, > > - flush); > > + flush, shift); > > else > > mmio_atsd_reg[i].reg = > > mmio_invalidate_pid(npu, pid, flush); > > @@ -585,10 +586,32 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, > > } > > } > > > > - mmio_invalidate_wait(mmio_atsd_reg, flush); > > + mmio_invalidate_wait(mmio_atsd_reg, flush, shift); > > if (flush) > > /* Wait for the flush to complete */ > > - mmio_invalidate_wait(mmio_atsd_reg, false); > > + mmio_invalidate_wait(mmio_atsd_reg, false, shift); > > +} > > + > > +static void pnv_npu2_invalidate_helper(struct npu_context *npu_context, > > + struct mm_struct *mm, unsigned long start, > > + unsigned long end, bool flush) > > +{ > > + unsigned long address; > > + unsigned int hshift = 0, shift; > > + > > + address = start; > > + do { > > + local_irq_disable(); > > + find_linux_pte(mm->pgd, address, NULL, &hshift); > > + if (hshift) > > + shift = hshift; > > + else > > + shift = PAGE_SHIFT; > > Looks better, thanks! > > Also in future we might be able to futher optimise this as I don't think the > shift needs to match the actual page size as we are directly issuing ATSDs to > the GPU. ie. we could bump shift to cover the whole of (start, end) or to > invalidate larger chunks at a time - I don't think we need to limit it to > PAGE_SHIFT. > > > + mmio_invalidate(npu_context, address > 0, address, flush, > > + shift); > > If address == 0 we end up invalidating the entire PID rather than just the page > at address 0. Probably not a big issue though as I'm guessing we wouldn't > actually see invalidations for the page@0 very often? > > > + local_irq_enable(); > > + address += (1ull << shift); > > + } while (address < end); > > } > > > > static void pnv_npu2_mn_release(struct mmu_notifier *mn, > > @@ -604,7 +627,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, > > * There should be no more translation requests for this PID, but we > > * need to ensure any entries for it are removed from the TLB. > > */ > > - mmio_invalidate(npu_context, 0, 0, true); > > + pnv_npu2_invalidate_helper(npu_context, mm, 0, PAGE_SIZE, true); > > } > > > > static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > > @@ -614,7 +637,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, > > { > > struct npu_context *npu_context = mn_to_npu_context(mn); > > > > - mmio_invalidate(npu_context, 1, address, true); > > + pnv_npu2_invalidate_helper(npu_context, mm, address, address, true); > > } > > > > static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > > @@ -622,13 +645,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > > unsigned long start, unsigned long end) > > { > > struct npu_context *npu_context = mn_to_npu_context(mn); > > - unsigned long address; > > > > - for (address = start; address < end; address += PAGE_SIZE) > > - mmio_invalidate(npu_context, 1, address, false); > > + pnv_npu2_invalidate_helper(npu_context, mm, start, end, false); > > > > /* Do the flush only on the final addess == end */ > > - mmio_invalidate(npu_context, 1, address, true); > > + pnv_npu2_invalidate_helper(npu_context, mm, end, end, true); > > } > > > > static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { > > > >
On Wed, Feb 7, 2018 at 2:14 PM, Alistair Popple <alistair@popple.id.au> wrote: > On Tuesday, 16 January 2018 3:15:05 PM AEDT Alistair Popple wrote: >> Thanks Balbir, one question below. I have no way of testing this at present but >> it looks ok to me. Thanks! > > The below are more future optimisations once we can test. So in the meantime: > > Acked-by: Alistair Popple <alistair@popple.id.au> @aneesh can you please look at this? @mpe can we pick this up if there are no objections? Balbir Singh
> @aneesh can you please look at this? @mpe can we pick this up if there > are no objections? @mpe any objections to picking this up for this release? Or do you want to wait for the next one? (there are likely more bugfixes coming for ATS support). - Alistair > Balbir Singh >
Alistair Popple <alistair@popple.id.au> writes: >> @aneesh can you please look at this? @mpe can we pick this up if there >> are no objections? > > @mpe any objections to picking this up for this release? Or do you want to wait > for the next one? (there are likely more bugfixes coming for ATS support). I can grab it for this release if it's fixing a bug, the subject makes it sound like a cleanup. Do we have a Fixes tag for it? I don't particularly like the potentially very long loop in pnv_npu2_invalidate_helper() which spends most of its time with interrupts off. I worry we're going to see soft lockups pointing there if we get a very large invalidate. cheers
On Wed, Feb 28, 2018 at 9:50 PM, Michael Ellerman <mpe@ellerman.id.au> wrote: > Alistair Popple <alistair@popple.id.au> writes: > >>> @aneesh can you please look at this? @mpe can we pick this up if there >>> are no objections? >> >> @mpe any objections to picking this up for this release? Or do you want to wait >> for the next one? (there are likely more bugfixes coming for ATS support). > > I can grab it for this release if it's fixing a bug, the subject makes > it sound like a cleanup. Do we have a Fixes tag for it? > > I don't particularly like the potentially very long loop in > pnv_npu2_invalidate_helper() which spends most of its time with > interrupts off. I worry we're going to see soft lockups pointing there > if we get a very large invalidate. > We have the same loop today in pnv_npu2_mn_invalidate_range(). The bug fix is really the fix to invalidate the right page size. I agree we can optimize this better, in fact if the mmu notifier is made blocking then we can even do cond_resched(). I however agree, the right thing to do is to figure out the right trade-off of invalidate range size to flushing the entire PID Balbir Singh.
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index f6cbc1a71472..e8caa3e2019d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -17,6 +17,7 @@ #include <linux/pci.h> #include <linux/memblock.h> #include <linux/iommu.h> +#include <linux/huge_mm.h> #include <asm/tlb.h> #include <asm/powernv.h> @@ -27,6 +28,7 @@ #include <asm/pnv-pci.h> #include <asm/msi_bitmap.h> #include <asm/opal.h> +#include <asm/pte-walk.h> #include "powernv.h" #include "pci.h" @@ -460,9 +462,6 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) /* PRS set to process-scoped */ launch |= PPC_BIT(13); - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* PID */ launch |= pid << PPC_BITLSHIFT(38); @@ -474,7 +473,8 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) } static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid, bool flush) + unsigned long pid, bool flush, + unsigned int shift) { unsigned long launch; @@ -485,9 +485,8 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, launch |= PPC_BIT(13); /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + launch |= (u64) mmu_get_ap(shift) << PPC_BITLSHIFT(17); - /* PID */ launch |= pid << PPC_BITLSHIFT(38); /* No flush */ @@ -504,7 +503,8 @@ struct mmio_atsd_reg { }; static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush, + unsigned int shift) { struct npu *npu; int i, reg; @@ -537,7 +537,8 @@ static void mmio_invalidate_wait( * the value of va. */ static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address, bool flush) + unsigned long address, bool flush, + unsigned int shift) { int i, j; struct npu *npu; @@ -572,7 +573,7 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, if (va) mmio_atsd_reg[i].reg = mmio_invalidate_va(npu, address, pid, - flush); + flush, shift); else mmio_atsd_reg[i].reg = mmio_invalidate_pid(npu, pid, flush); @@ -585,10 +586,32 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, } } - mmio_invalidate_wait(mmio_atsd_reg, flush); + mmio_invalidate_wait(mmio_atsd_reg, flush, shift); if (flush) /* Wait for the flush to complete */ - mmio_invalidate_wait(mmio_atsd_reg, false); + mmio_invalidate_wait(mmio_atsd_reg, false, shift); +} + +static void pnv_npu2_invalidate_helper(struct npu_context *npu_context, + struct mm_struct *mm, unsigned long start, + unsigned long end, bool flush) +{ + unsigned long address; + unsigned int hshift = 0, shift; + + address = start; + do { + local_irq_disable(); + find_linux_pte(mm->pgd, address, NULL, &hshift); + if (hshift) + shift = hshift; + else + shift = PAGE_SHIFT; + mmio_invalidate(npu_context, address > 0, address, flush, + shift); + local_irq_enable(); + address += (1ull << shift); + } while (address < end); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -604,7 +627,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0, true); + pnv_npu2_invalidate_helper(npu_context, mm, 0, PAGE_SIZE, true); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -614,7 +637,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address, true); + pnv_npu2_invalidate_helper(npu_context, mm, address, address, true); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -622,13 +645,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, unsigned long start, unsigned long end) { struct npu_context *npu_context = mn_to_npu_context(mn); - unsigned long address; - for (address = start; address < end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address, false); + pnv_npu2_invalidate_helper(npu_context, mm, start, end, false); /* Do the flush only on the final addess == end */ - mmio_invalidate(npu_context, 1, address, true); + pnv_npu2_invalidate_helper(npu_context, mm, end, end, true); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
While reviewing the code I found that the flush assumes all pages are of mmu_linear_psize, which is not correct. The patch uses find_linux_pte to find the right page size and uses that for launching the ATSD invalidation. A new helper is added to abstract the invalidation from the various notifiers. The patch also cleans up a bit by removing AP size from PID flushes. Signed-off-by: Balbir Singh <bsingharora@gmail.com> --- Changelog: Refactor the handling of return values from find_linux_pte as suggested by Aneesh arch/powerpc/platforms/powernv/npu-dma.c | 55 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 17 deletions(-)