Message ID | 1504910713-7094-5-git-send-email-linuxram@us.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | powerpc: Free up RPAGE_RSV bits | expand |
On Fri, 8 Sep 2017 15:44:44 -0700 Ram Pai <linuxram@us.ibm.com> wrote: > Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6 > in the 64K backed HPTE pages. This along with the earlier > patch will entirely free up the four bits from 64K PTE. > The bit numbers are big-endian as defined in the ISA3.0 > > This patch does the following change to 64K PTE backed > by 64K HPTE. > > H_PAGE_F_SECOND (S) which occupied bit 4 moves to the > second part of the pte to bit 60. > H_PAGE_F_GIX (G,I,X) which occupied bit 5, 6 and 7 also > moves to the second part of the pte to bit 61, > 62, 63, 64 respectively > > since bit 7 is now freed up, we move H_PAGE_BUSY (B) from > bit 9 to bit 7. > > The second part of the PTE will hold > (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. > NOTE: None of the bits in the secondary PTE were not used > by 64k-HPTE backed PTE. > > Before the patch, the 64K HPTE backed 64k PTE format was > as follows > > 0 1 2 3 4 5 6 7 8 9 10...........................63 > : : : : : : : : : : : : > v v v v v v v v v v v v > > ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, > |x|x|x| |S |G |I |X |x|B| |x|x|................|x|x|x|x| <- primary pte > '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' > | | | | | | | | | | | | |..................| | | | | <- secondary pte > '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' > > After the patch, the 64k HPTE backed 64k PTE format is > as follows > > 0 1 2 3 4 5 6 7 8 9 10...........................63 > : : : : : : : : : : : : > v v v v v v v v v v v v > > ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, > |x|x|x| | | | |B |x| | |x|x|................|.|.|.|.| <- primary pte > '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' > | | | | | | | | | | | | |..................|S|G|I|X| <- secondary pte > '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' > > The above PTE changes is applicable to hugetlbpages aswell. > > The patch does the following code changes: > > a) moves the H_PAGE_F_SECOND and H_PAGE_F_GIX to 4k PTE > header since it is no more needed b the 64k PTEs. > b) abstracts out __real_pte() and __rpte_to_hidx() so the > caller need not know the bit location of the slot. > c) moves the slot bits to the secondary pte. > > Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > Signed-off-by: Ram Pai <linuxram@us.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/hash-4k.h | 3 ++ > arch/powerpc/include/asm/book3s/64/hash-64k.h | 29 +++++++++++------------- > arch/powerpc/include/asm/book3s/64/hash.h | 3 -- > arch/powerpc/mm/hash64_64k.c | 23 ++++++++----------- > arch/powerpc/mm/hugetlbpage-hash64.c | 18 ++++++--------- > 5 files changed, 33 insertions(+), 43 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h > index e66bfeb..dc153c6 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h > +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h > @@ -16,6 +16,9 @@ > #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) > #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) > > +#define H_PAGE_F_GIX_SHIFT 56 > +#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ > +#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) > #define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ > > /* PTE flags to conserve for HPTE identification */ > diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h > index e038f1c..89ef5a9 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h > +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h > @@ -12,7 +12,7 @@ > */ > #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ > #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ > -#define H_PAGE_BUSY _RPAGE_RPN42 /* software: PTE & hash are busy */ > +#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ > > /* > * We need to differentiate between explicit huge page and THP huge > @@ -21,8 +21,7 @@ > #define H_PAGE_THP_HUGE H_PAGE_4K_PFN > > /* PTE flags to conserve for HPTE identification */ > -#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ > - H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) > +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) > /* > * we support 16 fragments per PTE page of 64K size. > */ > @@ -50,24 +49,22 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) > unsigned long *hidxp; > > rpte.pte = pte; > - rpte.hidx = 0; > - if (pte_val(pte) & H_PAGE_COMBO) { > - /* > - * Make sure we order the hidx load against the H_PAGE_COMBO > - * check. The store side ordering is done in __hash_page_4K > - */ > - smp_rmb(); > - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); > - rpte.hidx = *hidxp; > - } > + /* > + * Ensure that we do not read the hidx before we read > + * the pte. Because the writer side is expected > + * to finish writing the hidx first followed by the pte, > + * by using smp_wmb(). > + * pte_set_hash_slot() ensures that. > + */ > + smp_rmb(); > + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); > + rpte.hidx = *hidxp; > return rpte; > } > > static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) > { > - if ((pte_val(rpte.pte) & H_PAGE_COMBO)) > - return (rpte.hidx >> (index<<2)) & 0xf; > - return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; > + return ((rpte.hidx >> (index<<2)) & 0xfUL); > } > > /* > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > index 8ce4112..46f3a23 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash.h > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > @@ -8,9 +8,6 @@ > * > */ > #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS > -#define H_PAGE_F_GIX_SHIFT 56 > -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ > -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) > #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ > > #ifdef CONFIG_PPC_64K_PAGES > diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c > index c6c5559..9c63844 100644 > --- a/arch/powerpc/mm/hash64_64k.c > +++ b/arch/powerpc/mm/hash64_64k.c > @@ -103,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, > * On hash insert failure we use old pte value and we don't > * want slot information there if we have a insert failure. > */ > - old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); > - new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); > + old_pte &= ~H_PAGE_HASHPTE; > + new_pte &= ~H_PAGE_HASHPTE; Shouldn't we set old/new_pte.slot = invalid? via rpte.hidx > goto htab_insert_hpte; > } > /* > @@ -227,6 +227,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > unsigned long vsid, pte_t *ptep, unsigned long trap, > unsigned long flags, int ssize) > { > + real_pte_t rpte; > unsigned long hpte_group; > unsigned long rflags, pa; > unsigned long old_pte, new_pte; > @@ -263,6 +264,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); > > rflags = htab_convert_pte_flags(new_pte); > + rpte = __real_pte(__pte(old_pte), ptep); > > if (cpu_has_feature(CPU_FTR_NOEXECUTE) && > !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > @@ -270,18 +272,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > > vpn = hpt_vpn(ea, vsid, ssize); > if (unlikely(old_pte & H_PAGE_HASHPTE)) { > + unsigned long gslot; > /* > * There MIGHT be an HPTE for this pte > */ > - hash = hpt_hash(vpn, shift, ssize); > - if (old_pte & H_PAGE_F_SECOND) > - hash = ~hash; > - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; > - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; > - > - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, > - MMU_PAGE_64K, ssize, > - flags) == -1) > + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); > + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K, > + MMU_PAGE_64K, ssize, flags) == -1) > old_pte &= ~_PAGE_HPTEFLAGS; > } > > @@ -328,9 +325,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > MMU_PAGE_64K, MMU_PAGE_64K, old_pte); > return -1; > } > + > new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; > - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & > - (H_PAGE_F_SECOND | H_PAGE_F_GIX); > + new_pte |= pte_set_hash_slot(ptep, rpte, 0, slot); > } > *ptep = __pte(new_pte & ~H_PAGE_BUSY); > return 0; > diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c > index a84bb44..d52d667 100644 > --- a/arch/powerpc/mm/hugetlbpage-hash64.c > +++ b/arch/powerpc/mm/hugetlbpage-hash64.c > @@ -22,6 +22,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > pte_t *ptep, unsigned long trap, unsigned long flags, > int ssize, unsigned int shift, unsigned int mmu_psize) > { > + real_pte_t rpte; > unsigned long vpn; > unsigned long old_pte, new_pte; > unsigned long rflags, pa, sz; > @@ -61,6 +62,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); > > rflags = htab_convert_pte_flags(new_pte); > + rpte = __real_pte(__pte(old_pte), ptep); > > sz = ((1UL) << shift); > if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > @@ -71,16 +73,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > /* Check if pte already has an hpte (case 2) */ > if (unlikely(old_pte & H_PAGE_HASHPTE)) { > /* There MIGHT be an HPTE for this pte */ > - unsigned long hash, slot; > + unsigned long gslot; > > - hash = hpt_hash(vpn, shift, ssize); > - if (old_pte & H_PAGE_F_SECOND) > - hash = ~hash; > - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; > - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; > - > - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, > - mmu_psize, ssize, flags) == -1) > + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); > + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize, > + mmu_psize, ssize, flags) == -1) > old_pte &= ~_PAGE_HPTEFLAGS; > } > > @@ -106,8 +103,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > return -1; > } > > - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & > - (H_PAGE_F_SECOND | H_PAGE_F_GIX); > + new_pte |= pte_set_hash_slot(ptep, rpte, 0, slot); > } > > /* Balbir
On Fri, 2017-09-08 at 15:44 -0700, Ram Pai wrote: > The second part of the PTE will hold > (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. > NOTE: None of the bits in the secondary PTE were not used > by 64k-HPTE backed PTE. Have you measured the performance impact of this ? The second part of the PTE being in a different cache line there could be one... Cheers, Ben.
On Thu, Sep 14, 2017 at 11:44:49AM +1000, Balbir Singh wrote: > On Fri, 8 Sep 2017 15:44:44 -0700 > Ram Pai <linuxram@us.ibm.com> wrote: > > > Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6 > > in the 64K backed HPTE pages. This along with the earlier > > patch will entirely free up the four bits from 64K PTE. > > The bit numbers are big-endian as defined in the ISA3.0 > > > > This patch does the following change to 64K PTE backed > > by 64K HPTE. > > > > H_PAGE_F_SECOND (S) which occupied bit 4 moves to the > > second part of the pte to bit 60. > > H_PAGE_F_GIX (G,I,X) which occupied bit 5, 6 and 7 also > > moves to the second part of the pte to bit 61, > > 62, 63, 64 respectively > > > > since bit 7 is now freed up, we move H_PAGE_BUSY (B) from > > bit 9 to bit 7. > > > > The second part of the PTE will hold > > (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. > > NOTE: None of the bits in the secondary PTE were not used > > by 64k-HPTE backed PTE. > > > > Before the patch, the 64K HPTE backed 64k PTE format was > > as follows > > > > 0 1 2 3 4 5 6 7 8 9 10...........................63 > > : : : : : : : : : : : : > > v v v v v v v v v v v v > > > > ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, > > |x|x|x| |S |G |I |X |x|B| |x|x|................|x|x|x|x| <- primary pte > > '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' > > | | | | | | | | | | | | |..................| | | | | <- secondary pte > > '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' > > > > After the patch, the 64k HPTE backed 64k PTE format is > > as follows > > > > 0 1 2 3 4 5 6 7 8 9 10...........................63 > > : : : : : : : : : : : : > > v v v v v v v v v v v v > > > > ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, > > |x|x|x| | | | |B |x| | |x|x|................|.|.|.|.| <- primary pte > > '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' > > | | | | | | | | | | | | |..................|S|G|I|X| <- secondary pte > > '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' > > > > The above PTE changes is applicable to hugetlbpages aswell. > > > > The patch does the following code changes: > > > > a) moves the H_PAGE_F_SECOND and H_PAGE_F_GIX to 4k PTE > > header since it is no more needed b the 64k PTEs. > > b) abstracts out __real_pte() and __rpte_to_hidx() so the > > caller need not know the bit location of the slot. > > c) moves the slot bits to the secondary pte. > > > > Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > > Signed-off-by: Ram Pai <linuxram@us.ibm.com> > > --- > > arch/powerpc/include/asm/book3s/64/hash-4k.h | 3 ++ > > arch/powerpc/include/asm/book3s/64/hash-64k.h | 29 +++++++++++------------- > > arch/powerpc/include/asm/book3s/64/hash.h | 3 -- > > arch/powerpc/mm/hash64_64k.c | 23 ++++++++----------- > > arch/powerpc/mm/hugetlbpage-hash64.c | 18 ++++++--------- > > 5 files changed, 33 insertions(+), 43 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h > > index e66bfeb..dc153c6 100644 > > --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h > > +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h > > @@ -16,6 +16,9 @@ > > #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) > > #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) > > > > +#define H_PAGE_F_GIX_SHIFT 56 > > +#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ > > +#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) > > #define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ > > > > /* PTE flags to conserve for HPTE identification */ > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h > > index e038f1c..89ef5a9 100644 > > --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h > > +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h > > @@ -12,7 +12,7 @@ > > */ > > #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ > > #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ > > -#define H_PAGE_BUSY _RPAGE_RPN42 /* software: PTE & hash are busy */ > > +#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ > > > > /* > > * We need to differentiate between explicit huge page and THP huge > > @@ -21,8 +21,7 @@ > > #define H_PAGE_THP_HUGE H_PAGE_4K_PFN > > > > /* PTE flags to conserve for HPTE identification */ > > -#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ > > - H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) > > +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) > > /* > > * we support 16 fragments per PTE page of 64K size. > > */ > > @@ -50,24 +49,22 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) > > unsigned long *hidxp; > > > > rpte.pte = pte; > > - rpte.hidx = 0; > > - if (pte_val(pte) & H_PAGE_COMBO) { > > - /* > > - * Make sure we order the hidx load against the H_PAGE_COMBO > > - * check. The store side ordering is done in __hash_page_4K > > - */ > > - smp_rmb(); > > - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); > > - rpte.hidx = *hidxp; > > - } > > + /* > > + * Ensure that we do not read the hidx before we read > > + * the pte. Because the writer side is expected > > + * to finish writing the hidx first followed by the pte, > > + * by using smp_wmb(). > > + * pte_set_hash_slot() ensures that. > > + */ > > + smp_rmb(); > > + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); > > + rpte.hidx = *hidxp; > > return rpte; > > } > > > > static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) > > { > > - if ((pte_val(rpte.pte) & H_PAGE_COMBO)) > > - return (rpte.hidx >> (index<<2)) & 0xf; > > - return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; > > + return ((rpte.hidx >> (index<<2)) & 0xfUL); > > } > > > > /* > > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > > index 8ce4112..46f3a23 100644 > > --- a/arch/powerpc/include/asm/book3s/64/hash.h > > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > > @@ -8,9 +8,6 @@ > > * > > */ > > #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS > > -#define H_PAGE_F_GIX_SHIFT 56 > > -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ > > -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) > > #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ > > > > #ifdef CONFIG_PPC_64K_PAGES > > diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c > > index c6c5559..9c63844 100644 > > --- a/arch/powerpc/mm/hash64_64k.c > > +++ b/arch/powerpc/mm/hash64_64k.c > > @@ -103,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, > > * On hash insert failure we use old pte value and we don't > > * want slot information there if we have a insert failure. > > */ > > - old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); > > - new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); > > + old_pte &= ~H_PAGE_HASHPTE; > > + new_pte &= ~H_PAGE_HASHPTE; > > Shouldn't we set old/new_pte.slot = invalid? via rpte.hidx by resetting the H_PAGE_HASHPTE flag, we are invalidating slot information. Would that not be sufficient? RP > > > goto htab_insert_hpte; > > } > > /* > > @@ -227,6 +227,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > > unsigned long vsid, pte_t *ptep, unsigned long trap, > > unsigned long flags, int ssize) > > { > > + real_pte_t rpte; > > unsigned long hpte_group; > > unsigned long rflags, pa; > > unsigned long old_pte, new_pte; > > @@ -263,6 +264,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > > } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); > > > > rflags = htab_convert_pte_flags(new_pte); > > + rpte = __real_pte(__pte(old_pte), ptep); > > > > if (cpu_has_feature(CPU_FTR_NOEXECUTE) && > > !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > > @@ -270,18 +272,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > > > > vpn = hpt_vpn(ea, vsid, ssize); > > if (unlikely(old_pte & H_PAGE_HASHPTE)) { > > + unsigned long gslot; > > /* > > * There MIGHT be an HPTE for this pte > > */ > > - hash = hpt_hash(vpn, shift, ssize); > > - if (old_pte & H_PAGE_F_SECOND) > > - hash = ~hash; > > - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; > > - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; > > - > > - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, > > - MMU_PAGE_64K, ssize, > > - flags) == -1) > > + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); > > + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K, > > + MMU_PAGE_64K, ssize, flags) == -1) > > old_pte &= ~_PAGE_HPTEFLAGS; > > } > > > > @@ -328,9 +325,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, > > MMU_PAGE_64K, MMU_PAGE_64K, old_pte); > > return -1; > > } > > + > > new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; > > - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & > > - (H_PAGE_F_SECOND | H_PAGE_F_GIX); > > + new_pte |= pte_set_hash_slot(ptep, rpte, 0, slot); > > } > > *ptep = __pte(new_pte & ~H_PAGE_BUSY); > > return 0; > > diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c > > index a84bb44..d52d667 100644 > > --- a/arch/powerpc/mm/hugetlbpage-hash64.c > > +++ b/arch/powerpc/mm/hugetlbpage-hash64.c > > @@ -22,6 +22,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > > pte_t *ptep, unsigned long trap, unsigned long flags, > > int ssize, unsigned int shift, unsigned int mmu_psize) > > { > > + real_pte_t rpte; > > unsigned long vpn; > > unsigned long old_pte, new_pte; > > unsigned long rflags, pa, sz; > > @@ -61,6 +62,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > > } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); > > > > rflags = htab_convert_pte_flags(new_pte); > > + rpte = __real_pte(__pte(old_pte), ptep); > > > > sz = ((1UL) << shift); > > if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > > @@ -71,16 +73,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > > /* Check if pte already has an hpte (case 2) */ > > if (unlikely(old_pte & H_PAGE_HASHPTE)) { > > /* There MIGHT be an HPTE for this pte */ > > - unsigned long hash, slot; > > + unsigned long gslot; > > > > - hash = hpt_hash(vpn, shift, ssize); > > - if (old_pte & H_PAGE_F_SECOND) > > - hash = ~hash; > > - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; > > - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; > > - > > - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, > > - mmu_psize, ssize, flags) == -1) > > + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); > > + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize, > > + mmu_psize, ssize, flags) == -1) > > old_pte &= ~_PAGE_HPTEFLAGS; > > } > > > > @@ -106,8 +103,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, > > return -1; > > } > > > > - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & > > - (H_PAGE_F_SECOND | H_PAGE_F_GIX); > > + new_pte |= pte_set_hash_slot(ptep, rpte, 0, slot); > > } > > > > /* > > Balbir
On Thu, Sep 14, 2017 at 10:54:08AM -0700, Ram Pai wrote: > On Thu, Sep 14, 2017 at 11:44:49AM +1000, Balbir Singh wrote: > > On Fri, 8 Sep 2017 15:44:44 -0700 > > Ram Pai <linuxram@us.ibm.com> wrote: > > > > > Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6 > > > in the 64K backed HPTE pages. This along with the earlier > > > patch will entirely free up the four bits from 64K PTE. > > > The bit numbers are big-endian as defined in the ISA3.0 > > > > > > This patch does the following change to 64K PTE backed > > > by 64K HPTE. > > > > > > H_PAGE_F_SECOND (S) which occupied bit 4 moves to the > > > second part of the pte to bit 60. > > > H_PAGE_F_GIX (G,I,X) which occupied bit 5, 6 and 7 also > > > moves to the second part of the pte to bit 61, > > > 62, 63, 64 respectively > > > > > > since bit 7 is now freed up, we move H_PAGE_BUSY (B) from > > > bit 9 to bit 7. > > > > > > The second part of the PTE will hold > > > (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. > > > NOTE: None of the bits in the secondary PTE were not used > > > by 64k-HPTE backed PTE. > > > > > > Before the patch, the 64K HPTE backed 64k PTE format was > > > as follows > > > > > > 0 1 2 3 4 5 6 7 8 9 10...........................63 > > > : : : : : : : : : : : : > > > v v v v v v v v v v v v > > > > > > ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, > > > |x|x|x| |S |G |I |X |x|B| |x|x|................|x|x|x|x| <- primary pte > > > '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' > > > | | | | | | | | | | | | |..................| | | | | <- secondary pte > > > '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' > > > > > > After the patch, the 64k HPTE backed 64k PTE format is > > > as follows > > > > > > 0 1 2 3 4 5 6 7 8 9 10...........................63 > > > : : : : : : : : : : : : > > > v v v v v v v v v v v v > > > > > > ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, > > > |x|x|x| | | | |B |x| | |x|x|................|.|.|.|.| <- primary pte > > > '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' > > > | | | | | | | | | | | | |..................|S|G|I|X| <- secondary pte > > > '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' > > > > > > The above PTE changes is applicable to hugetlbpages aswell. > > > > > > The patch does the following code changes: > > > > > > a) moves the H_PAGE_F_SECOND and H_PAGE_F_GIX to 4k PTE > > > header since it is no more needed b the 64k PTEs. > > > b) abstracts out __real_pte() and __rpte_to_hidx() so the > > > caller need not know the bit location of the slot. > > > c) moves the slot bits to the secondary pte. > > > > > > Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > > > Signed-off-by: Ram Pai <linuxram@us.ibm.com> > > > --- > > > arch/powerpc/include/asm/book3s/64/hash-4k.h | 3 ++ > > > arch/powerpc/include/asm/book3s/64/hash-64k.h | 29 +++++++++++------------- > > > arch/powerpc/include/asm/book3s/64/hash.h | 3 -- > > > arch/powerpc/mm/hash64_64k.c | 23 ++++++++----------- > > > arch/powerpc/mm/hugetlbpage-hash64.c | 18 ++++++--------- > > > 5 files changed, 33 insertions(+), 43 deletions(-) > > > > > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h > > > index e66bfeb..dc153c6 100644 > > > --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h > > > +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h > > > @@ -16,6 +16,9 @@ > > > #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) > > > #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) > > > > > > +#define H_PAGE_F_GIX_SHIFT 56 > > > +#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ > > > +#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) > > > #define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ > > > > > > /* PTE flags to conserve for HPTE identification */ > > > diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h > > > index e038f1c..89ef5a9 100644 > > > --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h > > > +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h > > > @@ -12,7 +12,7 @@ > > > */ > > > #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ > > > #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ > > > -#define H_PAGE_BUSY _RPAGE_RPN42 /* software: PTE & hash are busy */ > > > +#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ > > > > > > /* > > > * We need to differentiate between explicit huge page and THP huge > > > @@ -21,8 +21,7 @@ > > > #define H_PAGE_THP_HUGE H_PAGE_4K_PFN > > > > > > /* PTE flags to conserve for HPTE identification */ > > > -#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ > > > - H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) > > > +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) > > > /* > > > * we support 16 fragments per PTE page of 64K size. > > > */ > > > @@ -50,24 +49,22 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) > > > unsigned long *hidxp; > > > > > > rpte.pte = pte; > > > - rpte.hidx = 0; > > > - if (pte_val(pte) & H_PAGE_COMBO) { > > > - /* > > > - * Make sure we order the hidx load against the H_PAGE_COMBO > > > - * check. The store side ordering is done in __hash_page_4K > > > - */ > > > - smp_rmb(); > > > - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); > > > - rpte.hidx = *hidxp; > > > - } > > > + /* > > > + * Ensure that we do not read the hidx before we read > > > + * the pte. Because the writer side is expected > > > + * to finish writing the hidx first followed by the pte, > > > + * by using smp_wmb(). > > > + * pte_set_hash_slot() ensures that. > > > + */ > > > + smp_rmb(); > > > + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); > > > + rpte.hidx = *hidxp; > > > return rpte; > > > } > > > > > > static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) > > > { > > > - if ((pte_val(rpte.pte) & H_PAGE_COMBO)) > > > - return (rpte.hidx >> (index<<2)) & 0xf; > > > - return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; > > > + return ((rpte.hidx >> (index<<2)) & 0xfUL); > > > } > > > > > > /* > > > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > > > index 8ce4112..46f3a23 100644 > > > --- a/arch/powerpc/include/asm/book3s/64/hash.h > > > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > > > @@ -8,9 +8,6 @@ > > > * > > > */ > > > #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS > > > -#define H_PAGE_F_GIX_SHIFT 56 > > > -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ > > > -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) > > > #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ > > > > > > #ifdef CONFIG_PPC_64K_PAGES > > > diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c > > > index c6c5559..9c63844 100644 > > > --- a/arch/powerpc/mm/hash64_64k.c > > > +++ b/arch/powerpc/mm/hash64_64k.c > > > @@ -103,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, > > > * On hash insert failure we use old pte value and we don't > > > * want slot information there if we have a insert failure. > > > */ > > > - old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); > > > - new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); > > > + old_pte &= ~H_PAGE_HASHPTE; > > > + new_pte &= ~H_PAGE_HASHPTE; > > > > Shouldn't we set old/new_pte.slot = invalid? via rpte.hidx > > by resetting the H_PAGE_HASHPTE flag, we are invalidating > slot information. Would that not be sufficient? I think i misunderstood you question. Yes rpte.hidx will have to be reset to invalid. The code does that further down in that function. if (!(old_pte & H_PAGE_COMBO)) rpte.hidx = ~0x0UL; RP
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes: > On Fri, 2017-09-08 at 15:44 -0700, Ram Pai wrote: >> The second part of the PTE will hold >> (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. >> NOTE: None of the bits in the secondary PTE were not used >> by 64k-HPTE backed PTE. > > Have you measured the performance impact of this ? The second part of > the PTE being in a different cache line there could be one... > I am also looking at a patch series removing the slot tracking completely. With randomize address turned off and no swap in guest/host and making sure we touched most of guest ram, I don't find much impact in performance when we don't track the slot at all. I will post the patch series with numbers in a day or two. But my test was while (5000) { mmap(128M) touch every page of 2048 pages munmap() } I could also be the best case in my run because i might have always found the hash pte slot in the primary. In one measurement with swap on and address randmization enabled, i did find a 50% impact. But then i was not able to recreate that again. So could be something i did wrong in the test setup. Ram, Will you be able to get a test run with the above loop? -aneesh
On Thu, Sep 14, 2017 at 06:13:57PM +1000, Benjamin Herrenschmidt wrote: > On Fri, 2017-09-08 at 15:44 -0700, Ram Pai wrote: > > The second part of the PTE will hold > > (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. > > NOTE: None of the bits in the secondary PTE were not used > > by 64k-HPTE backed PTE. > > Have you measured the performance impact of this ? The second part of > the PTE being in a different cache line there could be one... hmm..missed responding to this comment. I did a preliminay measurement running mmap bench in the selftest. Ran it multiple times. almost always the numbers were either equal-to or better-than without the patch-series. RP
On Mon, Oct 23, 2017 at 02:22:44PM +0530, Aneesh Kumar K.V wrote: > Benjamin Herrenschmidt <benh@kernel.crashing.org> writes: > > > On Fri, 2017-09-08 at 15:44 -0700, Ram Pai wrote: > >> The second part of the PTE will hold > >> (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. > >> NOTE: None of the bits in the secondary PTE were not used > >> by 64k-HPTE backed PTE. > > > > Have you measured the performance impact of this ? The second part of > > the PTE being in a different cache line there could be one... > > > > I am also looking at a patch series removing the slot tracking > completely. With randomize address turned off and no swap in guest/host > and making sure we touched most of guest ram, I don't find much impact > in performance when we don't track the slot at all. I will post the > patch series with numbers in a day or two. But my test was > > while (5000) { > mmap(128M) > touch every page of 2048 pages > munmap() > } > > I could also be the best case in my run because i might have always > found the hash pte slot in the primary. In one measurement with swap on > and address randmization enabled, i did find a 50% impact. But then i > was not able to recreate that again. So could be something i did wrong > in the test setup. > > Ram, > > Will you be able to get a test run with the above loop? Yes. results with patch look good; better than w/o patch. /-----------------------------------------------\ |Itteratn| secs w/ patch |secs w/o patch | ------------------------------------------------- |1 | 45.572621 | 49.046994 | |2 | 46.049545 | 49.378756 | |3 | 46.103657 | 49.223591 | |4 | 46.298903 | 48.991245 | |5 | 46.353202 | 48.988033 | |6 | 45.440878 | 49.175846 | |7 | 46.860373 | 49.008395 | |8 | 46.221390 | 49.236964 | |9 | 45.794993 | 49.171927 | |10 | 46.569491 | 48.995628 | |-----------------------------------------------| |average | 46.1265053 | 49.1217379 | \-----------------------------------------------/ The code is as follows: diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c index 8d084a2..ef2ad87 100644 --- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c +++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c @@ -10,14 +10,14 @@ #include "utils.h" -#define ITERATIONS 5000000 +#define ITERATIONS 5000 #define MEMSIZE (128 * 1024 * 1024) int test_mmap(void) { struct timespec ts_start, ts_end; - unsigned long i = ITERATIONS; + unsigned long i = ITERATIONS, j; clock_gettime(CLOCK_MONOTONIC, &ts_start); @@ -25,6 +25,10 @@ int test_mmap(void) char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); FAIL_IF(c == MAP_FAILED); + + for (j=0; j < (MEMSIZE >> 16); j++) + c[j<<16] = 0xf; + munmap(c, MEMSIZE); }
On 10/24/2017 12:52 AM, Ram Pai wrote: > On Thu, Sep 14, 2017 at 06:13:57PM +1000, Benjamin Herrenschmidt wrote: >> On Fri, 2017-09-08 at 15:44 -0700, Ram Pai wrote: >>> The second part of the PTE will hold >>> (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. >>> NOTE: None of the bits in the secondary PTE were not used >>> by 64k-HPTE backed PTE. >> >> Have you measured the performance impact of this ? The second part of >> the PTE being in a different cache line there could be one... > > hmm..missed responding to this comment. > > I did a preliminay measurement running mmap bench in the selftest. > Ran it multiple times. almost always the numbers were either equal-to > or better-than without the patch-series. mmap bench doesn't do any fault. It is just mmap/munmap in loop. -aneesh
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index e66bfeb..dc153c6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -16,6 +16,9 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) +#define H_PAGE_F_GIX_SHIFT 56 +#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) #define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ /* PTE flags to conserve for HPTE identification */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index e038f1c..89ef5a9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -12,7 +12,7 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ -#define H_PAGE_BUSY _RPAGE_RPN42 /* software: PTE & hash are busy */ +#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ /* * We need to differentiate between explicit huge page and THP huge @@ -21,8 +21,7 @@ #define H_PAGE_THP_HUGE H_PAGE_4K_PFN /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ - H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) /* * we support 16 fragments per PTE page of 64K size. */ @@ -50,24 +49,22 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) unsigned long *hidxp; rpte.pte = pte; - rpte.hidx = 0; - if (pte_val(pte) & H_PAGE_COMBO) { - /* - * Make sure we order the hidx load against the H_PAGE_COMBO - * check. The store side ordering is done in __hash_page_4K - */ - smp_rmb(); - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); - rpte.hidx = *hidxp; - } + /* + * Ensure that we do not read the hidx before we read + * the pte. Because the writer side is expected + * to finish writing the hidx first followed by the pte, + * by using smp_wmb(). + * pte_set_hash_slot() ensures that. + */ + smp_rmb(); + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + rpte.hidx = *hidxp; return rpte; } static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { - if ((pte_val(rpte.pte) & H_PAGE_COMBO)) - return (rpte.hidx >> (index<<2)) & 0xf; - return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; + return ((rpte.hidx >> (index<<2)) & 0xfUL); } /* diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 8ce4112..46f3a23 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -8,9 +8,6 @@ * */ #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS -#define H_PAGE_F_GIX_SHIFT 56 -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ #ifdef CONFIG_PPC_64K_PAGES diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index c6c5559..9c63844 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -103,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * On hash insert failure we use old pte value and we don't * want slot information there if we have a insert failure. */ - old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); - new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); + old_pte &= ~H_PAGE_HASHPTE; + new_pte &= ~H_PAGE_HASHPTE; goto htab_insert_hpte; } /* @@ -227,6 +227,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize) { + real_pte_t rpte; unsigned long hpte_group; unsigned long rflags, pa; unsigned long old_pte, new_pte; @@ -263,6 +264,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -270,18 +272,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access, vpn = hpt_vpn(ea, vsid, ssize); if (unlikely(old_pte & H_PAGE_HASHPTE)) { + unsigned long gslot; /* * There MIGHT be an HPTE for this pte */ - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, - MMU_PAGE_64K, ssize, - flags) == -1) + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -328,9 +325,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, MMU_PAGE_64K, MMU_PAGE_64K, old_pte); return -1; } + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hash_slot(ptep, rpte, 0, slot); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a84bb44..d52d667 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -22,6 +22,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) { + real_pte_t rpte; unsigned long vpn; unsigned long old_pte, new_pte; unsigned long rflags, pa, sz; @@ -61,6 +62,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -71,16 +73,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ - unsigned long hash, slot; + unsigned long gslot; - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, flags) == -1) + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize, + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -106,8 +103,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hash_slot(ptep, rpte, 0, slot); } /*