@@ -319,6 +319,11 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local, int ssize,
unsigned int shift, unsigned int mmu_psize);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __hash_page_thp(unsigned long ea, unsigned long access,
+ unsigned long vsid, pmd_t *pmdp, unsigned long trap,
+ int local, int ssize, unsigned int psize);
+#endif
extern void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
int ssize, int psize, int lpsize,
@@ -350,39 +350,18 @@ static inline void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
return __pgtable_cache_add(shift, sizeof(void *) << shift, ctor);
}
-/*
- * find_linux_pte returns the address of a linux pte for a given
- * effective address and directory. If not found, it returns zero.
- */
-static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- pte_t *pt = NULL;
-
- pg = pgdir + pgd_index(ea);
- if (!pgd_none(*pg)) {
- pu = pud_offset(pg, ea);
- if (!pud_none(*pu)) {
- pm = pmd_offset(pu, ea);
- if (pmd_present(*pm))
- pt = pte_offset_kernel(pm, ea);
- }
- }
- return pt;
-}
-
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE)
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- unsigned *shift);
+ unsigned *shift, unsigned int *thp);
#else
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, unsigned int *thp);
static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- unsigned *shift)
+ unsigned *shift,
+ unsigned int *thp)
{
if (shift)
*shift = 0;
- return find_linux_pte(pgdir, ea);
+ return find_linux_pte(pgdir, ea, thp);
}
#endif /* !CONFIG_HUGETLB_PAGE */
@@ -70,7 +70,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
- ptep = find_linux_pte(init_mm.pgd, vaddr);
+ ptep = find_linux_pte(init_mm.pgd, vaddr, NULL);
if (ptep == NULL)
paddr = 0;
else
@@ -683,7 +683,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
rcu_read_lock_sched();
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL);
+ hva, NULL, NULL);
if (ptep && pte_present(*ptep)) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (pte_write(pte))
@@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x)
unsigned long addr = (unsigned long) x;
pte_t *p;
- p = find_linux_pte(swapper_pg_dir, addr);
+ p = find_linux_pte(swapper_pg_dir, addr, NULL);
if (!p || !pte_present(*p))
return NULL;
/* assume we don't have huge pages in vmalloc space... */
@@ -145,6 +145,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unlock_rmap(rmap);
}
+/* FIXME!! check */
static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
int writing, unsigned long *pte_sizep)
{
@@ -152,7 +153,7 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
if (!ptep)
return __pte(0);
if (shift)
@@ -33,6 +33,7 @@ obj-y += hugetlbpage.o
obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += largepage-hash64.o
obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
@@ -939,7 +939,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- unsigned hugeshift;
+ unsigned hugeshift, thp;
const struct cpumask *tmp;
int rc, user_region = 0, local = 0;
int psize, ssize;
@@ -1005,7 +1005,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
#endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */
- ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
+ ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift, &thp);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
return 1;
@@ -1028,6 +1028,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
ssize, hugeshift, psize);
#endif /* CONFIG_HUGETLB_PAGE */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (thp)
+ return __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
+ trap, local, ssize, psize);
+#endif
+
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
#else
@@ -1133,7 +1139,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
pgdir = mm->pgd;
if (pgdir == NULL)
return;
- ptep = find_linux_pte(pgdir, ea);
+ ptep = find_linux_pte(pgdir, ea, NULL);
if (!ptep)
return;
@@ -67,7 +67,8 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
#define hugepd_none(hpd) ((hpd).pd == 0)
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+ unsigned *shift, unsigned int *thp)
{
pgd_t *pg;
pud_t *pu;
@@ -77,6 +78,8 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (shift)
*shift = 0;
+ if (thp)
+ *thp = 0;
pg = pgdir + pgd_index(ea);
if (is_hugepd(pg)) {
@@ -91,12 +94,20 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
pm = pmd_offset(pu, ea);
if (is_hugepd(pm))
hpdp = (hugepd_t *)pm;
- else if (!pmd_none(*pm)) {
+ else if (pmd_large(*pm)) {
+ /* THP page */
+ if (thp)
+ *thp = 1;
+ /*
+ * This should be ok, except for few flags
+ * most of the pte, large page pmd bits map
+ */
+ return (pte_t *)pm;
+ } else if (!pmd_none(*pm)) {
return pte_offset_kernel(pm, ea);
}
}
}
-
if (!hpdp)
return NULL;
@@ -614,7 +625,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
unsigned shift;
unsigned long mask;
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+ ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift, NULL);
/* Verify it is a huge page else bail. */
if (!ptep || !shift)
new file mode 100644
@@ -0,0 +1,170 @@
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+/*
+ * A linux huge page PMD was changed and the corresponding hash table entry
+ * neesd to be flushed. FIXME!! there is no batching support yet.
+ *
+ * The linux huge page PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
+ * With 16MB huge page and 64K HPTE we need 256 entries and with 4K HPTE we need
+ * 4096 entries. Both will fit in a 4K pgtable_t.
+ */
+int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
+ pmd_t *pmdp, unsigned long trap, int local, int ssize,
+ unsigned int psize)
+{
+ unsigned int index, valid;
+ unsigned char *hpte_slot_array;
+ unsigned long rflags, pa, hidx;
+ unsigned long old_pmd, new_pmd;
+ int ret, lpsize = MMU_PAGE_16M;
+ unsigned long vpn, hash, shift, slot;
+
+ /*
+ * atomically mark the linux large page PMD busy and dirty
+ */
+ do {
+ old_pmd = pmd_val(*pmdp);
+ /* If PMD busy, retry the access */
+ if (unlikely(old_pmd & PMD_HUGE_BUSY))
+ return 0;
+ /* If PMD permissions don't match, take page fault */
+ if (unlikely(access & ~old_pmd))
+ return 1;
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
+ new_pmd = old_pmd | PMD_HUGE_BUSY | PMD_HUGE_ACCESSED;
+ if (access & _PAGE_RW)
+ new_pmd |= PMD_HUGE_DIRTY;
+ } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
+ old_pmd, new_pmd));
+ /*
+ * derive the rflags. Default enable read (0x2)
+ */
+ rflags = 0x2 | (!(new_pmd & PMD_HUGE_RW));
+ /* PMD_HUGE_EXEC -> HW_NO_EXEC since it's inverted */
+ rflags |= ((new_pmd & PMD_HUGE_EXEC) ? 0 : HPTE_R_N);
+
+#if 0 /* FIXME!! */
+ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+ }
+#endif
+ /*
+ * Find the slot index details for this ea, using base page size.
+ */
+ shift = mmu_psize_defs[psize].shift;
+ index = (ea & (HUGE_PAGE_SIZE - 1)) >> shift;
+ BUG_ON(index > 4096);
+
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ /*
+ * The hpte hindex are stored in the pgtable whose address is in the
+ * second half of the PMD
+ */
+ hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
+
+ valid = hpte_slot_array[index] & 0x1;
+ if (unlikely(valid)) {
+ /* update the hpte bits */
+ hidx = hpte_slot_array[index] >> 1;
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+ ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
+ psize, ssize, local);
+ /*
+ * We failed to update, try to insert a new entry.
+ */
+ if (ret == -1) {
+ /*
+ * large pte is marked busy, so we can be sure
+ * nobody is looking at hpte_slot_array. hence we can
+ * safely update this here.
+ */
+ hpte_slot_array[index] = 0;
+ valid = 0;
+ }
+ }
+
+ if (likely(!valid)) {
+ unsigned long hpte_group;
+
+ /* insert new entry */
+ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
+repeat:
+ hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+ /* clear the busy bits and set the hash pte bits */
+ new_pmd = (new_pmd & ~PMD_HUGE_HPTEFLAGS) | PMD_HUGE_HASHPTE;
+
+#if 0
+ /* Add in WIMG bits. FIXME!! enabled by default */
+ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+ _PAGE_COHERENT | _PAGE_GUARDED));
+#endif
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ psize, lpsize, ssize);
+ /*
+ * Primary is full, try the secondary
+ */
+ if (unlikely(slot == -1)) {
+ hpte_group = ((~hash & htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
+ rflags, HPTE_V_SECONDARY,
+ psize, lpsize, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = ((hash & htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+
+ ppc_md.hpte_remove(hpte_group);
+ goto repeat;
+ }
+ }
+ /*
+ * Hypervisor failure. Restore old pmd and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *pmdp = __pmd(old_pmd);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ psize, lpsize, old_pmd);
+ return -1;
+ }
+ /*
+ * large pte is marked busy, so we can be sure
+ * nobody is looking at hpte_slot_array. hence we can
+ * safely update this here.
+ */
+ hpte_slot_array[index] = slot << 1 | 0x1;
+ }
+ /*
+ * No need to use ldarx/stdcx here
+ */
+ *pmdp = __pmd(new_pmd & ~PMD_HUGE_BUSY);
+ return 0;
+}
@@ -580,3 +580,37 @@ struct page *pmd_page(pmd_t pmd)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * find_linux_pte returns the address of a linux pte for a given
+ * effective address and directory. If not found, it returns zero.
+ */
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, unsigned int *thp)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt = NULL;
+
+ if (thp)
+ *thp = 0;
+ pg = pgdir + pgd_index(ea);
+ if (!pgd_none(*pg)) {
+ pu = pud_offset(pg, ea);
+ if (!pud_none(*pu)) {
+ pm = pmd_offset(pu, ea);
+ if (pmd_large(*pm)) {
+ /* THP page */
+ if (thp)
+ *thp = 1;
+ /*
+ * This should be ok, except for few flags
+ * most of the pte, large page pmd bits map
+ */
+ return (pte_t *)pm;
+ } else if (pmd_present(*pm))
+ pt = pte_offset_kernel(pm, ea);
+ }
+ }
+ return pt;
+}
@@ -206,7 +206,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) {
- pte_t *ptep = find_linux_pte(mm->pgd, start);
+ pte_t *ptep = find_linux_pte(mm->pgd, start, NULL);
unsigned long pte;
if (ptep == NULL)
@@ -125,7 +125,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
if (!pgdir)
return -EFAULT;
- ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
if (!shift)
shift = PAGE_SHIFT;
@@ -261,7 +261,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
pte_t *ptep;
unsigned long pa;
- ptep = find_linux_pte(init_mm.pgd, token);
+ ptep = find_linux_pte(init_mm.pgd, token, NULL);
if (!ptep)
return token;
pa = pte_pfn(*ptep) << PAGE_SHIFT;