Message ID | 20180306132507.10649-7-npiggin@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc/mm/slice: improve slice speed and stack use | expand |
Le 06/03/2018 à 14:25, Nicholas Piggin a écrit : > Rather than build slice masks from a range then use that to check for > fit in a candidate mask, implement slice_check_range_fits that checks > if a range fits in a mask directly. > > This allows several structures to be removed from stacks, and also we > don't expect a huge range in a lot of these cases, so building and > comparing a full mask is going to be more expensive than testing just > one or two bits of the range. > > On POWER8, this increases vfork+exec+exit performance by 0.3% > and reduces time to mmap+munmap a 64kB page by 5%. > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > arch/powerpc/mm/slice.c | 63 ++++++++++++++++++++++++++----------------------- > 1 file changed, 34 insertions(+), 29 deletions(-) > > diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c > index 2115efe5e869..3841fca75006 100644 > --- a/arch/powerpc/mm/slice.c > +++ b/arch/powerpc/mm/slice.c > @@ -179,26 +179,35 @@ static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) > #error "Must define the slice masks for page sizes supported by the platform" > #endif > > -static int slice_check_fit(struct mm_struct *mm, > - const struct slice_mask *mask, > - const struct slice_mask *available) > +static bool slice_check_range_fits(struct mm_struct *mm, > + const struct slice_mask *available, > + unsigned long start, unsigned long len) > { > - DECLARE_BITMAP(result, SLICE_NUM_HIGH); > - /* > - * Make sure we just do bit compare only to the max > - * addr limit and not the full bit map size. > - */ > - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); > + unsigned long end = start + len - 1; > + u64 low_slices = 0; > > - if (!SLICE_NUM_HIGH) > - return (mask->low_slices & available->low_slices) == > - mask->low_slices; > + if (start < SLICE_LOW_TOP) { > + unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); See slice_range_to_mask() You'll have an issue here with PPC32, you have to cast (SLICE_LOW_TOP - 1) to unsigned long because SLICE_LOW_TOP is unsigned long long on PPC32 > + > + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) > + - (1u << GET_LOW_SLICE_INDEX(start)); > + } > + if ((low_slices & available->low_slices) != low_slices) > + return false; > + > + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { > + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); > + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); > + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; > + unsigned long i; > > - bitmap_and(result, mask->high_slices, > - available->high_slices, slice_count); > + for (i = start_index; i < start_index + count; i++) { > + if (!test_bit(i, available->high_slices)) > + return false; > + } What about using bitmap_find_next_zero_area() > + } > > - return (mask->low_slices & available->low_slices) == mask->low_slices && > - bitmap_equal(result, mask->high_slices, slice_count); > + return true; > } > > static void slice_flush_segments(void *parm) > @@ -562,15 +571,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, > #endif > > /* First check hint if it's valid or if we have MAP_FIXED */ > - if (addr != 0 || fixed) { > - /* Build a mask for the requested range */ > - slice_range_to_mask(addr, len, &mask); > - slice_print_mask(" mask", &mask); > - > + if (addr || fixed) { It is cleanup, should it really be part of this patch ? > /* Check if we fit in the good mask. If we do, we just return, > * nothing else to do > */ > - if (slice_check_fit(mm, &mask, &good_mask)) { > + if (slice_check_range_fits(mm, &good_mask, addr, len)) { > slice_dbg(" fits good !\n"); > return addr; > } > @@ -596,10 +601,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, > slice_or_mask(&potential_mask, &good_mask); > slice_print_mask(" potential", &potential_mask); > > - if ((addr != 0 || fixed) && > - slice_check_fit(mm, &mask, &potential_mask)) { > - slice_dbg(" fits potential !\n"); > - goto convert; > + if (addr || fixed) { > + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { > + slice_dbg(" fits potential !\n"); > + goto convert; > + } Why not keep the original structure and just replacing slice_check_fit() by slice_check_range_fits() ? I believe cleanups should not be mixed with real feature changes. If needed, you should have a cleanup patch up front the serie. Christophe > } > > /* If we have MAP_FIXED and failed the above steps, then error out */ > @@ -772,13 +778,12 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, > int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, > unsigned long len) > { > - struct slice_mask mask, available; > + struct slice_mask available; > unsigned int psize = mm->context.user_psize; > > if (radix_enabled()) > return 0; > > - slice_range_to_mask(addr, len, &mask); > available = *slice_mask_for_size(mm, psize); > #ifdef CONFIG_PPC_64K_PAGES > /* We need to account for 4k slices too */ > @@ -795,6 +800,6 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, > slice_print_mask(" mask", &mask); > slice_print_mask(" available", &available); > #endif > - return !slice_check_fit(mm, &mask, &available); > + return !slice_check_range_fits(mm, &available, addr, len); > } > #endif >
On Tue, 6 Mar 2018 15:41:00 +0100 Christophe LEROY <christophe.leroy@c-s.fr> wrote: > Le 06/03/2018 à 14:25, Nicholas Piggin a écrit : > > +static bool slice_check_range_fits(struct mm_struct *mm, > > + const struct slice_mask *available, > > + unsigned long start, unsigned long len) > > { > > - DECLARE_BITMAP(result, SLICE_NUM_HIGH); > > - /* > > - * Make sure we just do bit compare only to the max > > - * addr limit and not the full bit map size. > > - */ > > - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); > > + unsigned long end = start + len - 1; > > + u64 low_slices = 0; > > > > - if (!SLICE_NUM_HIGH) > > - return (mask->low_slices & available->low_slices) == > > - mask->low_slices; > > + if (start < SLICE_LOW_TOP) { > > + unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); > > See slice_range_to_mask() > > You'll have an issue here with PPC32, you have to cast (SLICE_LOW_TOP - > 1) to unsigned long because SLICE_LOW_TOP is unsigned long long on PPC32 Okay thanks. Forgot to cross compiled it on 8xx, so I'll do that next time. > > + > > + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) > > + - (1u << GET_LOW_SLICE_INDEX(start)); > > + } > > + if ((low_slices & available->low_slices) != low_slices) > > + return false; > > + > > + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { > > + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); > > + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); > > + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; > > + unsigned long i; > > > > - bitmap_and(result, mask->high_slices, > > - available->high_slices, slice_count); > > + for (i = start_index; i < start_index + count; i++) { > > + if (!test_bit(i, available->high_slices)) > > + return false; > > + } > > What about using bitmap_find_next_zero_area() I'll look at it. Perhaps in another patch, because existing loops are not using bitmap range operations either. A series to convert those is a good idea. > > @@ -562,15 +571,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, > > #endif > > > > /* First check hint if it's valid or if we have MAP_FIXED */ > > - if (addr != 0 || fixed) { > > - /* Build a mask for the requested range */ > > - slice_range_to_mask(addr, len, &mask); > > - slice_print_mask(" mask", &mask); > > - > > + if (addr || fixed) { > > It is cleanup, should it really be part of this patch ? > > @@ -596,10 +601,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, > > slice_or_mask(&potential_mask, &good_mask); > > slice_print_mask(" potential", &potential_mask); > > > > - if ((addr != 0 || fixed) && > > - slice_check_fit(mm, &mask, &potential_mask)) { > > - slice_dbg(" fits potential !\n"); > > - goto convert; > > + if (addr || fixed) { > > + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { > > + slice_dbg(" fits potential !\n"); > > + goto convert; > > + } > > Why not keep the original structure and just replacing slice_check_fit() > by slice_check_range_fits() ? > > I believe cleanups should not be mixed with real feature changes. If > needed, you should have a cleanup patch up front the serie. For code that is already changing, I think minor cleanups are okay if they're very simple. Maybe this is getting to the point of needing another patch. You've made valid points for a lot of other unnecessary cleanups though, so I'll fix all of those. Thanks, Nick
Le 07/03/2018 à 00:12, Nicholas Piggin a écrit : > On Tue, 6 Mar 2018 15:41:00 +0100 > Christophe LEROY <christophe.leroy@c-s.fr> wrote: > >> Le 06/03/2018 à 14:25, Nicholas Piggin a écrit : > > >>> +static bool slice_check_range_fits(struct mm_struct *mm, >>> + const struct slice_mask *available, >>> + unsigned long start, unsigned long len) >>> { >>> - DECLARE_BITMAP(result, SLICE_NUM_HIGH); >>> - /* >>> - * Make sure we just do bit compare only to the max >>> - * addr limit and not the full bit map size. >>> - */ >>> - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); >>> + unsigned long end = start + len - 1; >>> + u64 low_slices = 0; >>> >>> - if (!SLICE_NUM_HIGH) >>> - return (mask->low_slices & available->low_slices) == >>> - mask->low_slices; >>> + if (start < SLICE_LOW_TOP) { >>> + unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); >> >> See slice_range_to_mask() >> >> You'll have an issue here with PPC32, you have to cast (SLICE_LOW_TOP - >> 1) to unsigned long because SLICE_LOW_TOP is unsigned long long on PPC32 > > Okay thanks. Forgot to cross compiled it on 8xx, so I'll do that next > time. > >>> + >>> + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) >>> + - (1u << GET_LOW_SLICE_INDEX(start)); >>> + } >>> + if ((low_slices & available->low_slices) != low_slices) >>> + return false; >>> + >>> + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { >>> + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); >>> + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); >>> + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; >>> + unsigned long i; >>> >>> - bitmap_and(result, mask->high_slices, >>> - available->high_slices, slice_count); >>> + for (i = start_index; i < start_index + count; i++) { >>> + if (!test_bit(i, available->high_slices)) >>> + return false; >>> + } >> >> What about using bitmap_find_next_zero_area() > > I'll look at it. Perhaps in another patch, because existing > loops are not using bitmap range operations either. A series > to convert those is a good idea. > >>> @@ -562,15 +571,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, >>> #endif >>> >>> /* First check hint if it's valid or if we have MAP_FIXED */ >>> - if (addr != 0 || fixed) { >>> - /* Build a mask for the requested range */ >>> - slice_range_to_mask(addr, len, &mask); >>> - slice_print_mask(" mask", &mask); >>> - >>> + if (addr || fixed) { >> >> It is cleanup, should it really be part of this patch ? > > > >>> @@ -596,10 +601,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, >>> slice_or_mask(&potential_mask, &good_mask); >>> slice_print_mask(" potential", &potential_mask); >>> >>> - if ((addr != 0 || fixed) && >>> - slice_check_fit(mm, &mask, &potential_mask)) { >>> - slice_dbg(" fits potential !\n"); >>> - goto convert; >>> + if (addr || fixed) { >>> + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { >>> + slice_dbg(" fits potential !\n"); >>> + goto convert; >>> + } >> >> Why not keep the original structure and just replacing slice_check_fit() >> by slice_check_range_fits() ? >> >> I believe cleanups should not be mixed with real feature changes. If >> needed, you should have a cleanup patch up front the serie. > > For code that is already changing, I think minor cleanups are okay if > they're very simple. Maybe this is getting to the point of needing > another patch. You've made valid points for a lot of other unnecessary > cleanups though, so I'll fix all of those. Ok, that's not a big point, but I like when patches really modifies only the lines they need to modify. Why do we need a double if ? Why not just the following ? With proper alignment of the second line with the open parenthese, it fits in one line if ((addr != 0 || fixed) && - slice_check_fit(mm, &mask, &potential_mask)) { + slice_check_range_fits(mm, &potential_mask, addr, len)) { slice_dbg(" fits potential !\n"); goto convert; } Christophe
On Wed, 7 Mar 2018 07:12:23 +0100 Christophe LEROY <christophe.leroy@c-s.fr> wrote: > Le 07/03/2018 à 00:12, Nicholas Piggin a écrit : > > On Tue, 6 Mar 2018 15:41:00 +0100 > > Christophe LEROY <christophe.leroy@c-s.fr> wrote: > > > >> Le 06/03/2018 à 14:25, Nicholas Piggin a écrit : > >>> @@ -596,10 +601,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, > >>> slice_or_mask(&potential_mask, &good_mask); > >>> slice_print_mask(" potential", &potential_mask); > >>> > >>> - if ((addr != 0 || fixed) && > >>> - slice_check_fit(mm, &mask, &potential_mask)) { > >>> - slice_dbg(" fits potential !\n"); > >>> - goto convert; > >>> + if (addr || fixed) { > >>> + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { > >>> + slice_dbg(" fits potential !\n"); > >>> + goto convert; > >>> + } > >> > >> Why not keep the original structure and just replacing slice_check_fit() > >> by slice_check_range_fits() ? > >> > >> I believe cleanups should not be mixed with real feature changes. If > >> needed, you should have a cleanup patch up front the serie. > > > > For code that is already changing, I think minor cleanups are okay if > > they're very simple. Maybe this is getting to the point of needing > > another patch. You've made valid points for a lot of other unnecessary > > cleanups though, so I'll fix all of those. > > Ok, that's not a big point, but I like when patches really modifies > only the lines they need to modify. Fair point, and in the end I agree mostly they should do that. But I don't think entirely if you can make the code slightly better as you go (again, so long as the change is obvious). I think having extra patches for trivial cleanups is not that great either. > Why do we need a double if ? > > Why not just the following ? With proper alignment of the second line > with the open parenthese, it fits in one line > > if ((addr != 0 || fixed) && > - slice_check_fit(mm, &mask, &potential_mask)) { > + slice_check_range_fits(mm, &potential_mask, addr, len)) { > slice_dbg(" fits potential !\n"); > goto convert; For this case the main motivation was to make this test match the form of the same test (with different mask) above here. Doing the same thing with different coding styles annoys me. I think I kept this one but fixed all your other suggestions in the v2 series. Thanks, Nick
Le 07/03/2018 à 08:16, Nicholas Piggin a écrit : > On Wed, 7 Mar 2018 07:12:23 +0100 > Christophe LEROY <christophe.leroy@c-s.fr> wrote: > >> Le 07/03/2018 à 00:12, Nicholas Piggin a écrit : >>> On Tue, 6 Mar 2018 15:41:00 +0100 >>> Christophe LEROY <christophe.leroy@c-s.fr> wrote: >>> >>>> Le 06/03/2018 à 14:25, Nicholas Piggin a écrit : > >>>>> @@ -596,10 +601,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, >>>>> slice_or_mask(&potential_mask, &good_mask); >>>>> slice_print_mask(" potential", &potential_mask); >>>>> >>>>> - if ((addr != 0 || fixed) && >>>>> - slice_check_fit(mm, &mask, &potential_mask)) { >>>>> - slice_dbg(" fits potential !\n"); >>>>> - goto convert; >>>>> + if (addr || fixed) { >>>>> + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { >>>>> + slice_dbg(" fits potential !\n"); >>>>> + goto convert; >>>>> + } >>>> >>>> Why not keep the original structure and just replacing slice_check_fit() >>>> by slice_check_range_fits() ? >>>> >>>> I believe cleanups should not be mixed with real feature changes. If >>>> needed, you should have a cleanup patch up front the serie. >>> >>> For code that is already changing, I think minor cleanups are okay if >>> they're very simple. Maybe this is getting to the point of needing >>> another patch. You've made valid points for a lot of other unnecessary >>> cleanups though, so I'll fix all of those. >> >> Ok, that's not a big point, but I like when patches really modifies >> only the lines they need to modify. > > Fair point, and in the end I agree mostly they should do that. But I > don't think entirely if you can make the code slightly better as you > go (again, so long as the change is obvious). I think having extra > patches for trivial cleanups is not that great either. > >> Why do we need a double if ? >> >> Why not just the following ? With proper alignment of the second line >> with the open parenthese, it fits in one line >> >> if ((addr != 0 || fixed) && >> - slice_check_fit(mm, &mask, &potential_mask)) { >> + slice_check_range_fits(mm, &potential_mask, addr, len)) { >> slice_dbg(" fits potential !\n"); >> goto convert; > > For this case the main motivation was to make this test match the > form of the same test (with different mask) above here. Doing the > same thing with different coding styles annoys me. Yes good point. Christophe > > I think I kept this one but fixed all your other suggestions in > the v2 series. > > Thanks, > Nick >
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 2115efe5e869..3841fca75006 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -179,26 +179,35 @@ static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) #error "Must define the slice masks for page sizes supported by the platform" #endif -static int slice_check_fit(struct mm_struct *mm, - const struct slice_mask *mask, - const struct slice_mask *available) +static bool slice_check_range_fits(struct mm_struct *mm, + const struct slice_mask *available, + unsigned long start, unsigned long len) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - /* - * Make sure we just do bit compare only to the max - * addr limit and not the full bit map size. - */ - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); + unsigned long end = start + len - 1; + u64 low_slices = 0; - if (!SLICE_NUM_HIGH) - return (mask->low_slices & available->low_slices) == - mask->low_slices; + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); + + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(start)); + } + if ((low_slices & available->low_slices) != low_slices) + return false; + + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; + unsigned long i; - bitmap_and(result, mask->high_slices, - available->high_slices, slice_count); + for (i = start_index; i < start_index + count; i++) { + if (!test_bit(i, available->high_slices)) + return false; + } + } - return (mask->low_slices & available->low_slices) == mask->low_slices && - bitmap_equal(result, mask->high_slices, slice_count); + return true; } static void slice_flush_segments(void *parm) @@ -562,15 +571,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, #endif /* First check hint if it's valid or if we have MAP_FIXED */ - if (addr != 0 || fixed) { - /* Build a mask for the requested range */ - slice_range_to_mask(addr, len, &mask); - slice_print_mask(" mask", &mask); - + if (addr || fixed) { /* Check if we fit in the good mask. If we do, we just return, * nothing else to do */ - if (slice_check_fit(mm, &mask, &good_mask)) { + if (slice_check_range_fits(mm, &good_mask, addr, len)) { slice_dbg(" fits good !\n"); return addr; } @@ -596,10 +601,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_or_mask(&potential_mask, &good_mask); slice_print_mask(" potential", &potential_mask); - if ((addr != 0 || fixed) && - slice_check_fit(mm, &mask, &potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + if (addr || fixed) { + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { + slice_dbg(" fits potential !\n"); + goto convert; + } } /* If we have MAP_FIXED and failed the above steps, then error out */ @@ -772,13 +778,12 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { - struct slice_mask mask, available; + struct slice_mask available; unsigned int psize = mm->context.user_psize; if (radix_enabled()) return 0; - slice_range_to_mask(addr, len, &mask); available = *slice_mask_for_size(mm, psize); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ @@ -795,6 +800,6 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, slice_print_mask(" mask", &mask); slice_print_mask(" available", &available); #endif - return !slice_check_fit(mm, &mask, &available); + return !slice_check_range_fits(mm, &available, addr, len); } #endif
Rather than build slice masks from a range then use that to check for fit in a candidate mask, implement slice_check_range_fits that checks if a range fits in a mask directly. This allows several structures to be removed from stacks, and also we don't expect a huge range in a lot of these cases, so building and comparing a full mask is going to be more expensive than testing just one or two bits of the range. On POWER8, this increases vfork+exec+exit performance by 0.3% and reduces time to mmap+munmap a 64kB page by 5%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- arch/powerpc/mm/slice.c | 63 ++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 29 deletions(-)