@@ -231,6 +231,10 @@ typedef struct {
u64 high_slices_psize; /* 4 bits per slice for now */
u16 user_psize; /* page size index */
#endif
+#ifdef CONFIG_PPC_64K_PAGES
+ /* for 4K PTE fragment support */
+ struct page *pgtable_page;
+#endif
} mm_context_t;
/* Page size definitions, common between 32 and 64-bit
@@ -498,6 +498,10 @@ typedef struct {
unsigned long acop; /* mask of enabled coprocessor types */
unsigned int cop_pid; /* pid value used with coprocessors */
#endif /* CONFIG_PPC_ICSWX */
+#ifdef CONFIG_PPC_64K_PAGES
+ /* for 4K PTE fragment support */
+ struct page *pgtable_page;
+#endif
} mm_context_t;
@@ -378,7 +378,11 @@ void arch_free_page(struct page *page, int order);
struct vm_area_struct;
+#ifdef CONFIG_PPC_64K_PAGES
+typedef pte_t *pgtable_t;
+#else
typedef struct page *pgtable_t;
+#endif
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */
@@ -150,6 +150,13 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
#else /* if CONFIG_PPC_64K_PAGES */
+extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
+extern void page_table_free(struct mm_struct *, unsigned long *, int);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+#ifdef CONFIG_SMP
+extern void __tlb_remove_table(void *_table);
+#endif
+
#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@@ -161,90 +168,42 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_populate_kernel(mm, pmd, page_address(pte_page));
+ pmd_set(pmd, (unsigned long)pte_page);
}
static inline pgtable_t pmd_pgtable(pmd_t pmd)
{
- return pmd_page(pmd);
+ return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+ return (pte_t *)page_table_alloc(mm, address, 1);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+ unsigned long address)
{
- struct page *page;
- pte_t *pte;
-
- pte = pte_alloc_one_kernel(mm, address);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- pgtable_page_ctor(page);
- return page;
+ return (pgtable_t)page_table_alloc(mm, address, 0);
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- free_page((unsigned long)pte);
+ page_table_free(mm, (unsigned long *)pte, 1);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- if (!index_size)
- free_page((unsigned long)table);
- else {
- BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(index_size), table);
- }
+ page_table_free(mm, (unsigned long *)ptepage, 0);
}
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else /* !CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif /* CONFIG_SMP */
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
- struct page *page = page_address(table);
-
tlb_flush_pgtable(tlb, address);
- pgtable_page_dtor(page);
- pgtable_free_tlb(tlb, page, 0);
+ pgtable_free_tlb(tlb, table, 0);
}
-
#endif /* CONFIG_PPC_64K_PAGES */
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -258,7 +217,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
}
-
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
#ifndef CONFIG_PPC_64K_PAGES
@@ -575,7 +575,9 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
-
+#ifdef CONFIG_PPC_64K_PAGES
+ init_mm.context.pgtable_page = NULL;
+#endif
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
@@ -86,6 +86,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
spin_lock_init(mm->context.cop_lockp);
#endif /* CONFIG_PPC_ICSWX */
+#ifdef CONFIG_PPC_64K_PAGES
+ mm->context.pgtable_page = NULL;
+#endif
return 0;
}
@@ -97,13 +100,45 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);
+#ifdef CONFIG_PPC_64K_PAGES
+static void destroy_pagetable_page(struct mm_struct *mm)
+{
+ int count;
+ struct page *page;
+
+ page = mm->context.pgtable_page;
+ if (!page)
+ return;
+
+ /* drop all the pending references */
+ count = atomic_read(&page->_mapcount) + 1;
+ /* We allow PTE_FRAG_NR(16) fragments from a PTE page */
+ count = atomic_sub_return(16 - count, &page->_count);
+ if (!count) {
+ pgtable_page_dtor(page);
+ reset_page_mapcount(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+#else
+static inline void destroy_pagetable_page(struct mm_struct *mm)
+{
+ return;
+}
+#endif
+
+
void destroy_context(struct mm_struct *mm)
{
+
#ifdef CONFIG_PPC_ICSWX
drop_cop(mm->context.acop, mm);
kfree(mm->context.cop_lockp);
mm->context.cop_lockp = NULL;
#endif /* CONFIG_PPC_ICSWX */
+
+ destroy_pagetable_page(mm);
__destroy_context(mm->context.id);
subpage_prot_free(mm);
mm->context.id = MMU_NO_CONTEXT;
@@ -337,3 +337,147 @@ EXPORT_SYMBOL(__ioremap_at);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__iounmap);
EXPORT_SYMBOL(__iounmap_at);
+
+#ifdef CONFIG_PPC_64K_PAGES
+/*
+ * we support 16 fragments per PTE page. This is limited by how many
+ * bits we can pack in page->_mapcount. We use the first half for
+ * tracking the usage for rcu page table free.
+ */
+#define PTE_FRAG_NR 16
+/*
+ * We use a 2K PTE page fragment and another 2K for storing
+ * real_pte_t hash index
+ */
+#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
+
+static pte_t *__get_from_cache(struct mm_struct *mm)
+{
+ int index;
+ pte_t *ret = NULL;
+ struct page *page;
+
+ page = mm->context.pgtable_page;
+ if (page) {
+ void *p = page_address(page);
+ index = atomic_add_return(1, &page->_mapcount);
+ ret = (pte_t *) (p + (index * PTE_FRAG_SIZE));
+ /*
+ * If we have taken up all the fragments mark PTE page NULL
+ */
+ if (index == PTE_FRAG_NR - 1)
+ mm->context.pgtable_page = NULL;
+ }
+ return ret;
+}
+
+static pte_t *get_from_cache(struct mm_struct *mm)
+{
+ pte_t *ret = NULL;
+
+ spin_lock(&mm->page_table_lock);
+ ret = __get_from_cache(mm);
+ spin_unlock(&mm->page_table_lock);
+
+ return ret;
+}
+
+static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
+{
+ pte_t *ret = NULL;
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+ __GFP_REPEAT | __GFP_ZERO);
+
+ if (page) {
+ spin_lock(&mm->page_table_lock);
+ if (likely(!mm->context.pgtable_page)) {
+ atomic_set(&page->_count, PTE_FRAG_NR);
+ atomic_set(&page->_mapcount, 0);
+ mm->context.pgtable_page = page;
+ ret = (unsigned long *)page_address(page);
+ if (!kernel)
+ pgtable_page_ctor(page);
+ page = NULL;
+ } else
+ ret = __get_from_cache(mm);
+ spin_unlock(&mm->page_table_lock);
+ }
+
+ if (page)
+ free_hot_cold_page(page, 0);
+ return ret;
+}
+
+pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+{
+ pte_t *pte;
+
+ pte = get_from_cache(mm);
+ if (pte)
+ return pte;
+
+ return __alloc_for_cache(mm, kernel);
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
+{
+ struct page *page = virt_to_page(table);
+ if (put_page_testzero(page)) {
+ if (!kernel)
+ pgtable_page_dtor(page);
+ reset_page_mapcount(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+#ifdef CONFIG_SMP
+static void page_table_free_rcu(void *table)
+{
+ struct page *page = virt_to_page(table);
+ if (put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ reset_page_mapcount(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ if (!shift)
+ /* PTE page needs special handling */
+ page_table_free_rcu(table);
+ else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+#else
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ if (!shift) {
+ /* PTE page needs special handling */
+ struct page *page = virt_to_page(table);
+ if (put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ reset_page_mapcount(page);
+ free_hot_cold_page(page, 0);
+ }
+ } else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+#endif
+#endif /* CONFIG_PPC_64K_PAGES */