Message ID | 1412073306-13812-10-git-send-email-mikey@neuling.org (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Michael Neuling <mikey@neuling.org> writes: > From: Ian Munsie <imunsie@au1.ibm.com> > > This adds a new function hash_page_mm() based on the existing hash_page(). > This version allows any struct mm to be passed in, rather than assuming > current. This is useful for servicing co-processor faults which are not in the > context of the current running process. > > We need to be careful here as the current hash_page() assumes current in a few > places. It would be nice to document the rules here. So when we try to add a hash page entry, and if that result in demotion of the segment are we suppose to flush slbs ? Also why would one want to hash anything other than current->mm ? How will this get called ? May be they are explained in later patches. But can we also explain it here. > > Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> > Signed-off-by: Michael Neuling <mikey@neuling.org> > --- > arch/powerpc/include/asm/mmu-hash64.h | 1 + > arch/powerpc/mm/hash_utils_64.c | 22 ++++++++++++++-------- > 2 files changed, 15 insertions(+), 8 deletions(-) > > diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h > index 6d0b7a2..f84e5a5 100644 > --- a/arch/powerpc/include/asm/mmu-hash64.h > +++ b/arch/powerpc/include/asm/mmu-hash64.h > @@ -322,6 +322,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, > unsigned int local, int ssize); > struct mm_struct; > unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); > +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); > extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); > int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > pte_t *ptep, unsigned long trap, int local, int ssize, > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > index bbdb054..0a5c8c0 100644 > --- a/arch/powerpc/mm/hash_utils_64.c > +++ b/arch/powerpc/mm/hash_utils_64.c > @@ -904,7 +904,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) > return; > slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); > copro_flush_all_slbs(mm); > - if (get_paca_psize(addr) != MMU_PAGE_4K) { > + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { > get_paca()->context = mm->context; > slb_flush_and_rebolt(); > } > @@ -989,26 +989,24 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, > * -1 - critical hash insertion error > * -2 - access not permitted by subpage protection mechanism > */ > -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) > { > enum ctx_state prev_state = exception_enter(); > pgd_t *pgdir; > unsigned long vsid; > - struct mm_struct *mm; > pte_t *ptep; > unsigned hugeshift; > const struct cpumask *tmp; > int rc, user_region = 0, local = 0; > int psize, ssize; > > - DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", > - ea, access, trap); > + DBG_LOW("%s(ea=%016lx, access=%lx, trap=%lx\n", > + __func__, ea, access, trap); > > /* Get region & vsid */ > switch (REGION_ID(ea)) { > case USER_REGION_ID: > user_region = 1; > - mm = current->mm; > if (! mm) { > DBG_LOW(" user region with no mm !\n"); > rc = 1; > @@ -1104,7 +1102,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > WARN_ON(1); > } > #endif > - check_paca_psize(ea, mm, psize, user_region); > + if (current->mm == mm) > + check_paca_psize(ea, mm, psize, user_region); > > goto bail; > } > @@ -1145,7 +1144,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > } > } > > - check_paca_psize(ea, mm, psize, user_region); > + if (current->mm == mm) > + check_paca_psize(ea, mm, psize, user_region); > #endif /* CONFIG_PPC_64K_PAGES */ > > #ifdef CONFIG_PPC_HAS_HASH_64K > @@ -1180,6 +1180,12 @@ bail: > exception_exit(prev_state); > return rc; > } > +EXPORT_SYMBOL_GPL(hash_page_mm); > + > +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > +{ > + return hash_page_mm(current->mm, ea, access, trap); > +} > EXPORT_SYMBOL_GPL(hash_page); > > void hash_preload(struct mm_struct *mm, unsigned long ea, > -- > 1.9.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/
On Tue, 2014-30-09 at 10:34:58 UTC, Michael Neuling wrote: > From: Ian Munsie <imunsie@au1.ibm.com> > > This adds a new function hash_page_mm() based on the existing hash_page(). > This version allows any struct mm to be passed in, rather than assuming > current. This is useful for servicing co-processor faults which are not in the > context of the current running process. I'm not a big fan. hash_page() is already a train wreck, and this doesn't make it any better. > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > index bbdb054..0a5c8c0 100644 > --- a/arch/powerpc/mm/hash_utils_64.c > +++ b/arch/powerpc/mm/hash_utils_64.c > @@ -904,7 +904,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) > return; > slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); > copro_flush_all_slbs(mm); > - if (get_paca_psize(addr) != MMU_PAGE_4K) { > + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { > get_paca()->context = mm->context; > slb_flush_and_rebolt(); This is a bit fishy. If that mm is currently running on another cpu you just failed to update it's paca. But I think the call to check_paca_psize() in hash_page() will save you on that cpu. In fact we might be able to remove that synchronisation from demote_segment_4k() and always leave it up to check_paca_psize()? > @@ -989,26 +989,24 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, > * -1 - critical hash insertion error > * -2 - access not permitted by subpage protection mechanism > */ > -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) > { > enum ctx_state prev_state = exception_enter(); > pgd_t *pgdir; > unsigned long vsid; > - struct mm_struct *mm; > pte_t *ptep; > unsigned hugeshift; > const struct cpumask *tmp; > int rc, user_region = 0, local = 0; > int psize, ssize; > > - DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", > - ea, access, trap); > + DBG_LOW("%s(ea=%016lx, access=%lx, trap=%lx\n", > + __func__, ea, access, trap); > > /* Get region & vsid */ > switch (REGION_ID(ea)) { > case USER_REGION_ID: > user_region = 1; > - mm = current->mm; > if (! mm) { > DBG_LOW(" user region with no mm !\n"); > rc = 1; What about the VMALLOC case where we do: mm = &init_mm; Is that what you want? It seems odd that you pass an mm to the routine, but then potentially it ends up using a different mm after all depending on the address. cheers
On Wed, 2014-10-01 at 15:13 +0530, Aneesh Kumar K.V wrote: > Michael Neuling <mikey@neuling.org> writes: > > > From: Ian Munsie <imunsie@au1.ibm.com> > > > > This adds a new function hash_page_mm() based on the existing hash_page(). > > This version allows any struct mm to be passed in, rather than assuming > > current. This is useful for servicing co-processor faults which are not in the > > context of the current running process. > > > > We need to be careful here as the current hash_page() assumes current in a few > > places. > > It would be nice to document the rules here. So when we try to add a hash > page entry, and if that result in demotion of the segment are we suppose to > flush slbs ? Yeah, we found it sucky to understand. The current documentation is "buy benh a beer and ask him" which doesn't scale very well unless you're benh and you like beer. > Also why would one want to hash anything other > than current->mm ? How will this get called ? We are calling this on behalf of a co-processor (eg cxl). The mm this is currently associated with may not be running on a cpu. > May be they are explained in later patches. But can we also explain it > here. Ok, I'll add something (mpe had the same question). Mikey > > > > > Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> > > Signed-off-by: Michael Neuling <mikey@neuling.org> > > --- > > arch/powerpc/include/asm/mmu-hash64.h | 1 + > > arch/powerpc/mm/hash_utils_64.c | 22 ++++++++++++++-------- > > 2 files changed, 15 insertions(+), 8 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h > > index 6d0b7a2..f84e5a5 100644 > > --- a/arch/powerpc/include/asm/mmu-hash64.h > > +++ b/arch/powerpc/include/asm/mmu-hash64.h > > @@ -322,6 +322,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, > > unsigned int local, int ssize); > > struct mm_struct; > > unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); > > +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); > > extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); > > int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > > pte_t *ptep, unsigned long trap, int local, int ssize, > > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > > index bbdb054..0a5c8c0 100644 > > --- a/arch/powerpc/mm/hash_utils_64.c > > +++ b/arch/powerpc/mm/hash_utils_64.c > > @@ -904,7 +904,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) > > return; > > slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); > > copro_flush_all_slbs(mm); > > - if (get_paca_psize(addr) != MMU_PAGE_4K) { > > + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { > > get_paca()->context = mm->context; > > slb_flush_and_rebolt(); > > } > > @@ -989,26 +989,24 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, > > * -1 - critical hash insertion error > > * -2 - access not permitted by subpage protection mechanism > > */ > > -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > > +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) > > { > > enum ctx_state prev_state = exception_enter(); > > pgd_t *pgdir; > > unsigned long vsid; > > - struct mm_struct *mm; > > pte_t *ptep; > > unsigned hugeshift; > > const struct cpumask *tmp; > > int rc, user_region = 0, local = 0; > > int psize, ssize; > > > > - DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", > > - ea, access, trap); > > + DBG_LOW("%s(ea=%016lx, access=%lx, trap=%lx\n", > > + __func__, ea, access, trap); > > > > /* Get region & vsid */ > > switch (REGION_ID(ea)) { > > case USER_REGION_ID: > > user_region = 1; > > - mm = current->mm; > > if (! mm) { > > DBG_LOW(" user region with no mm !\n"); > > rc = 1; > > @@ -1104,7 +1102,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > > WARN_ON(1); > > } > > #endif > > - check_paca_psize(ea, mm, psize, user_region); > > + if (current->mm == mm) > > + check_paca_psize(ea, mm, psize, user_region); > > > > goto bail; > > } > > @@ -1145,7 +1144,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > > } > > } > > > > - check_paca_psize(ea, mm, psize, user_region); > > + if (current->mm == mm) > > + check_paca_psize(ea, mm, psize, user_region); > > #endif /* CONFIG_PPC_64K_PAGES */ > > > > #ifdef CONFIG_PPC_HAS_HASH_64K > > @@ -1180,6 +1180,12 @@ bail: > > exception_exit(prev_state); > > return rc; > > } > > +EXPORT_SYMBOL_GPL(hash_page_mm); > > + > > +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > > +{ > > + return hash_page_mm(current->mm, ea, access, trap); > > +} > > EXPORT_SYMBOL_GPL(hash_page); > > > > void hash_preload(struct mm_struct *mm, unsigned long ea, > > -- > > 1.9.1 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ >
On Thu, 2014-10-02 at 13:48 +1000, Michael Ellerman wrote: > On Tue, 2014-30-09 at 10:34:58 UTC, Michael Neuling wrote: > > From: Ian Munsie <imunsie@au1.ibm.com> > > > > This adds a new function hash_page_mm() based on the existing hash_page(). > > This version allows any struct mm to be passed in, rather than assuming > > current. This is useful for servicing co-processor faults which are not in the > > context of the current running process. > > I'm not a big fan. hash_page() is already a train wreck, and this doesn't make > it any better. I can document it to make the situation a bit better. It's certainly not clear which one to use here and under what circumstances. It's basically ask benh territory. > > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > > index bbdb054..0a5c8c0 100644 > > --- a/arch/powerpc/mm/hash_utils_64.c > > +++ b/arch/powerpc/mm/hash_utils_64.c > > @@ -904,7 +904,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) > > return; > > slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); > > copro_flush_all_slbs(mm); > > - if (get_paca_psize(addr) != MMU_PAGE_4K) { > > + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { > > get_paca()->context = mm->context; > > slb_flush_and_rebolt(); > > This is a bit fishy. > > If that mm is currently running on another cpu you just failed to update it's > paca. But I think the call to check_paca_psize() in hash_page() will save you > on that cpu. > > In fact we might be able to remove that synchronisation from > demote_segment_4k() and always leave it up to check_paca_psize()? Aneesh asked the same thing for v1 and we convinced ourselves it was ok. I said this at the time... I had a chat to benh offline about this and he thinks it's fine. A running process in the same mm context will either have hit this mapping or not. If it's hit it, the page will be invalidated and it'll come in via hash_page and have it's segment demoted also (and paca updated). If it hasn't hit, again it'll come into hash_page() and get demoted also. > > @@ -989,26 +989,24 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, > > * -1 - critical hash insertion error > > * -2 - access not permitted by subpage protection mechanism > > */ > > -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) > > +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) > > { > > enum ctx_state prev_state = exception_enter(); > > pgd_t *pgdir; > > unsigned long vsid; > > - struct mm_struct *mm; > > pte_t *ptep; > > unsigned hugeshift; > > const struct cpumask *tmp; > > int rc, user_region = 0, local = 0; > > int psize, ssize; > > > > - DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", > > - ea, access, trap); > > + DBG_LOW("%s(ea=%016lx, access=%lx, trap=%lx\n", > > + __func__, ea, access, trap); > > > > /* Get region & vsid */ > > switch (REGION_ID(ea)) { > > case USER_REGION_ID: > > user_region = 1; > > - mm = current->mm; > > if (! mm) { > > DBG_LOW(" user region with no mm !\n"); > > rc = 1; > > What about the VMALLOC case where we do: > mm = &init_mm; > > Is that what you want? It seems odd that you pass an mm to the routine, but > then potentially it ends up using a different mm after all depending on the > address. Good point. We have hash_page() still. I can make that check in there and decide which mm to use and pass that to hash_page_mm(). Then we always use mm in hash_page_mm(). hash_page() will then look like this: int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; return hash_page_mm(mm, ea, access, trap); } Mikey
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 6d0b7a2..f84e5a5 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -322,6 +322,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned int local, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bbdb054..0a5c8c0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -904,7 +904,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); copro_flush_all_slbs(mm); - if (get_paca_psize(addr) != MMU_PAGE_4K) { + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -989,26 +989,24 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; - struct mm_struct *mm; pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; int rc, user_region = 0, local = 0; int psize, ssize; - DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", - ea, access, trap); + DBG_LOW("%s(ea=%016lx, access=%lx, trap=%lx\n", + __func__, ea, access, trap); /* Get region & vsid */ switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; - mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); rc = 1; @@ -1104,7 +1102,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); goto bail; } @@ -1145,7 +1144,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } } - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K @@ -1180,6 +1180,12 @@ bail: exception_exit(prev_state); return rc; } +EXPORT_SYMBOL_GPL(hash_page_mm); + +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + return hash_page_mm(current->mm, ea, access, trap); +} EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea,