@@ -29,6 +29,7 @@
#include <asm/page.h>
#include <asm/accounting.h>
+#define SLB_PRELOAD_NR 8U
/*
* low level task data.
*/
@@ -44,6 +45,9 @@ struct thread_info {
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
struct cpu_accounting_data accounting;
#endif
+ unsigned int slb_preload_nr;
+ unsigned long slb_preload_ea[SLB_PRELOAD_NR];
+
/* low level flags - has atomic operations done on it */
unsigned long flags ____cacheline_aligned_in_smp;
};
@@ -1710,6 +1710,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
/*
* Set up a thread for executing a new program
*/
@@ -1717,6 +1719,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
{
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ preload_new_slb_context(start, sp);
+#endif
#endif
/*
@@ -85,7 +85,9 @@ int hash__alloc_context_id(void)
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
-static int hash__init_new_context(struct mm_struct *mm)
+void init_new_slb_context(struct task_struct *tsk, struct mm_struct *mm);
+
+static int hash__init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
int index;
@@ -107,8 +109,10 @@ static int hash__init_new_context(struct mm_struct *mm)
* We should not be calling init_new_context() on init_mm. Hence a
* check against 0 is OK.
*/
- if (mm->context.id == 0)
+ if (mm->context.id == 0) {
slice_init_new_context_exec(mm);
+ init_new_slb_context(tsk, mm);
+ }
subpage_prot_init_new_context(mm);
@@ -152,7 +156,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
if (radix_enabled())
index = radix__init_new_context(mm);
else
- index = hash__init_new_context(mm);
+ index = hash__init_new_context(tsk, mm);
if (index < 0)
return index;
@@ -216,14 +216,85 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
}
+static bool preload_hit(struct thread_info *ti, unsigned long ea)
+{
+ int i;
+
+ for (i = 0; i < min(SLB_PRELOAD_NR, ti->slb_preload_nr); i++)
+ if (esids_match(ti->slb_preload_ea[i], ea))
+ return true;
+ return false;
+}
+
+static bool preload_add(struct thread_info *ti, unsigned long ea)
+{
+ if (preload_hit(ti, ea))
+ return false;
+
+ ti->slb_preload_ea[ti->slb_preload_nr % SLB_PRELOAD_NR] = ea;
+ ti->slb_preload_nr++;
+
+ return true;
+}
+
+void preload_new_slb_context(unsigned long start, unsigned long sp)
+{
+ struct thread_info *ti = current_thread_info();
+ struct mm_struct *mm = current->mm;
+ unsigned long heap = mm->start_brk;
+
+ // printk("preload new slb context tsk:%s pc:%lx heap:%lx stack:%lx\n", current->comm, start, heap, sp);
+
+ if (!is_kernel_addr(start)) {
+ if (preload_add(ti, start))
+ slb_allocate_user(mm, start);
+ }
+
+ if (!is_kernel_addr(sp)) {
+ if (preload_add(ti, sp))
+ slb_allocate_user(mm, sp);
+ }
+
+ if (heap && !is_kernel_addr(heap)) {
+ if (preload_add(ti, heap))
+ slb_allocate_user(mm, heap);
+ }
+
+ // preload mm->mmap_base is too late at this point
+}
+
+void init_new_slb_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ struct thread_info *ti = task_thread_info(tsk);
+
+ // printk("init new slb context tsk:%s\n", tsk->comm);
+
+ /*
+ * Clear out previous cache because new exec will put addresses
+ * at different places. Preloading is still generally a win here
+ * because we don't have all ELF information yet and take several
+ * misses in kernel space on the user addresss when loading binary
+ * and libraries. However we don't want to insert more slb entries
+ * than we need for small processes. Probably should look at aging
+ * out the preload cache slowly at context switch time.
+ */
+ ti->slb_preload_nr = 0;
+
+ /*
+ * preload some userspace segments into the SLB.
+ * Almost all 32 and 64bit PowerPC executables are linked at
+ * 0x10000000 so it makes sense to preload this segment.
+ */
+ preload_add(ti, 0x10000000);
+}
+
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
unsigned long offset;
unsigned long slbie_data = 0;
- unsigned long pc = KSTK_EIP(tsk);
- unsigned long stack = KSTK_ESP(tsk);
- unsigned long exec_base;
+ struct thread_info *ti = task_thread_info(tsk);
+ int i;
/*
* We need interrupts hard-disabled here, not just soft-disabled,
@@ -269,25 +340,12 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
}
get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
- /*
- * preload some userspace segments into the SLB.
- * Almost all 32 and 64bit PowerPC executables are linked at
- * 0x10000000 so it makes sense to preload this segment.
+ /* XXX: should we gradually age out SLBs after a number of context
+ * switches to reduce reload overhead of unused entries (like we do
+ * with FP/VEC reload)?
*/
- exec_base = 0x10000000;
-
- if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
- is_kernel_addr(exec_base))
- return;
-
- slb_allocate_user(mm, pc);
-
- if (!esids_match(pc, stack))
- slb_allocate_user(mm, stack);
-
- if (!esids_match(pc, exec_base) &&
- !esids_match(stack, exec_base))
- slb_allocate_user(mm, exec_base);
+ for (i = 0; i < min(SLB_PRELOAD_NR, ti->slb_preload_nr); i++)
+ slb_allocate_user(mm, ti->slb_preload_ea[i]);
}
void slb_set_size(u16 size)
@@ -536,11 +594,16 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
return slb_allocate_kernel(ea, id);
} else {
struct mm_struct *mm = current->mm;
+ long err;
if (unlikely(!mm))
return -EFAULT;
- return slb_allocate_user(mm, ea);
+ err = slb_allocate_user(mm, ea);
+ if (!err)
+ preload_add(current_thread_info(), ea);
+
+ return err;
}
}