Message ID | 20240418092327.860135-13-benjamin@sipsolutions.net |
---|---|
State | Superseded |
Headers | show |
Series | Rework stub syscall and page table handling | expand |
On 4/18/24 5:23 PM, benjamin@sipsolutions.net wrote: > diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h > index 37eb6e89e79a..bf8da736609c 100644 > --- a/arch/um/include/asm/mmu.h > +++ b/arch/um/include/asm/mmu.h > @@ -10,6 +10,10 @@ > > typedef struct mm_context { > struct mm_id id; > + > + /* Address range in need of a TLB sync */ > + long int sync_tlb_range_from; > + long int sync_tlb_range_to; Why not "unsigned long"? > } mm_context_t; > > extern void __switch_mm(struct mm_id * mm_idp); > diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h > index e1ece21dbe3f..5bb397b65efb 100644 > --- a/arch/um/include/asm/pgtable.h > +++ b/arch/um/include/asm/pgtable.h > @@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) > > #define PFN_PTE_SHIFT PAGE_SHIFT > > +static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start, > + unsigned long end) > +{ > + if (!mm->context.sync_tlb_range_to) { > + mm->context.sync_tlb_range_from = start; > + mm->context.sync_tlb_range_to = end; > + } else { > + if (start < mm->context.sync_tlb_range_from) > + mm->context.sync_tlb_range_from = start; > + if (end > mm->context.sync_tlb_range_to) > + mm->context.sync_tlb_range_to = end; > + } > +} IIUC, in some cases, the range [sync_tlb_range_from, sync_tlb_range_to) might become very large when merging non-adjacent ranges? Could that be an issue? > diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h > index d7cf82023b74..62816f6f1c91 100644 > --- a/arch/um/include/asm/tlbflush.h > +++ b/arch/um/include/asm/tlbflush.h > @@ -9,24 +9,50 @@ > #include <linux/mm.h> > > /* > - * TLB flushing: > + * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls > + * from the process handling the MM (which can be the kernel itself). > + * > + * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes > + * we catch all PTE transitions where memory that was unusable becomes usable. > + * While with flush_tlb_* we can track any memory that becomes unusable and > + * even if a higher layer of the page table was modified. > + * > + * So, we simply track updates using both methods and mark the memory area to > + * be synced later on. The only special case is that flush_tlb_kern_* needs to > + * be executed immediately as there is no good synchronization point in that > + * case. In contrast, in the set_ptes case we can wait for the next kernel > + * segfault before we do the synchornization. > * > - * - flush_tlb() flushes the current mm struct TLBs > * - flush_tlb_all() flushes all processes TLBs > * - flush_tlb_mm(mm) flushes the specified mm context TLB's > * - flush_tlb_page(vma, vmaddr) flushes one page > - * - flush_tlb_kernel_vm() flushes the kernel vm area > * - flush_tlb_range(vma, start, end) flushes a range of pages > + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages > */ > > +extern int um_tlb_sync(struct mm_struct *mm); > + > extern void flush_tlb_all(void); > extern void flush_tlb_mm(struct mm_struct *mm); > -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > - unsigned long end); > -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); > -extern void flush_tlb_kernel_vm(void); > -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); > -extern void __flush_tlb_one(unsigned long addr); > + > +static void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) > +{ > + um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE); > +} > + > +static void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + um_tlb_mark_sync(vma->vm_mm, start, end); > +} > + > +static void flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + um_tlb_mark_sync(&init_mm, start, end); > + > + /* Kernel needs to be synced immediately */ > + um_tlb_sync(&init_mm); > +} Nit: this is a header file, these functions should be defined as inline functions. > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c > index c137ff6f84dd..232aa7601d5d 100644 > --- a/arch/um/kernel/tlb.c > +++ b/arch/um/kernel/tlb.c [...] > > -void flush_tlb_kernel_range(unsigned long start, unsigned long end) > -{ > - flush_tlb_kernel_range_common(start, end); > -} > - > -void flush_tlb_kernel_vm(void) > -{ > - flush_tlb_kernel_range_common(start_vm, end_vm); > -} The build breaks with this change, as there is still a call to flush_tlb_kernel_vm() in ubd. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/um/drivers/ubd_kern.c?id=fb5d1d389c9e78d68f1f71f926d6251017579f5b#n774 Regards, Tiwei
On Mon, 2024-04-22 at 10:51 +0800, Tiwei Bie wrote: > On 4/18/24 5:23 PM, benjamin@sipsolutions.net wrote: > > diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h > > index 37eb6e89e79a..bf8da736609c 100644 > > --- a/arch/um/include/asm/mmu.h > > +++ b/arch/um/include/asm/mmu.h > > @@ -10,6 +10,10 @@ > > > > typedef struct mm_context { > > struct mm_id id; > > + > > + /* Address range in need of a TLB sync */ > > + long int sync_tlb_range_from; > > + long int sync_tlb_range_to; > > Why not "unsigned long"? Oops, yes, it should be "unsigned long". > > > } mm_context_t; > > > > extern void __switch_mm(struct mm_id * mm_idp); > > diff --git a/arch/um/include/asm/pgtable.h > > b/arch/um/include/asm/pgtable.h > > index e1ece21dbe3f..5bb397b65efb 100644 > > --- a/arch/um/include/asm/pgtable.h > > +++ b/arch/um/include/asm/pgtable.h > > @@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, > > pte_t pteval) > > > > #define PFN_PTE_SHIFT PAGE_SHIFT > > > > +static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned > > long start, > > + unsigned long end) > > +{ > > + if (!mm->context.sync_tlb_range_to) { > > + mm->context.sync_tlb_range_from = start; > > + mm->context.sync_tlb_range_to = end; > > + } else { > > + if (start < mm->context.sync_tlb_range_from) > > + mm->context.sync_tlb_range_from = start; > > + if (end > mm->context.sync_tlb_range_to) > > + mm->context.sync_tlb_range_to = end; > > + } > > +} > > IIUC, in some cases, the range [sync_tlb_range_from, sync_tlb_range_to) > might become very large when merging non-adjacent ranges? Could that > be an issue? I figured it is not a big problem. It will result in scanning the entire page table once to check whether the NEW_PAGE bit is set on any PTE. I am assuming that this will happen almost never and scanning the page table (but not doing syscalls) is reasonably cheap at the end. > > diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h > > index d7cf82023b74..62816f6f1c91 100644 > > --- a/arch/um/include/asm/tlbflush.h > > +++ b/arch/um/include/asm/tlbflush.h > > @@ -9,24 +9,50 @@ > > #include <linux/mm.h> > > > > /* > > - * TLB flushing: > > + * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls > > + * from the process handling the MM (which can be the kernel itself). > > + * > > + * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes > > + * we catch all PTE transitions where memory that was unusable becomes usable. > > + * While with flush_tlb_* we can track any memory that becomes unusable and > > + * even if a higher layer of the page table was modified. > > + * > > + * So, we simply track updates using both methods and mark the memory area to > > + * be synced later on. The only special case is that flush_tlb_kern_* needs to > > + * be executed immediately as there is no good synchronization point in that > > + * case. In contrast, in the set_ptes case we can wait for the next kernel > > + * segfault before we do the synchornization. > > * > > - * - flush_tlb() flushes the current mm struct TLBs > > * - flush_tlb_all() flushes all processes TLBs > > * - flush_tlb_mm(mm) flushes the specified mm context TLB's > > * - flush_tlb_page(vma, vmaddr) flushes one page > > - * - flush_tlb_kernel_vm() flushes the kernel vm area > > * - flush_tlb_range(vma, start, end) flushes a range of pages > > + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages > > */ > > > > +extern int um_tlb_sync(struct mm_struct *mm); > > + > > extern void flush_tlb_all(void); > > extern void flush_tlb_mm(struct mm_struct *mm); > > -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > > - unsigned long end); > > -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); > > -extern void flush_tlb_kernel_vm(void); > > -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); > > -extern void __flush_tlb_one(unsigned long addr); > > + > > +static void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) > > +{ > > + um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE); > > +} > > + > > +static void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > > + unsigned long end) > > +{ > > + um_tlb_mark_sync(vma->vm_mm, start, end); > > +} > > + > > +static void flush_tlb_kernel_range(unsigned long start, unsigned long end) > > +{ > > + um_tlb_mark_sync(&init_mm, start, end); > > + > > + /* Kernel needs to be synced immediately */ > > + um_tlb_sync(&init_mm); > > +} > > Nit: this is a header file, these functions should be defined as inline functions. Yup, thanks! > > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c > > index c137ff6f84dd..232aa7601d5d 100644 > > --- a/arch/um/kernel/tlb.c > > +++ b/arch/um/kernel/tlb.c > [...] > > > > -void flush_tlb_kernel_range(unsigned long start, unsigned long > > end) > > -{ > > - flush_tlb_kernel_range_common(start, end); > > -} > > - > > -void flush_tlb_kernel_vm(void) > > -{ > > - flush_tlb_kernel_range_common(start_vm, end_vm); > > -} > > The build breaks with this change, as there is still a call to > flush_tlb_kernel_vm() in ubd. > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/um/drivers/ubd_kern.c?id=fb5d1d389c9e78d68f1f71f926d6251017579f5b#n774 Oh, thanks for the pointer! I do not see a good reason for that call to even exist. My best theory right now is that it existed to avoid later pagefaults for new memory regions (the vmalloc?). So a workaround that is not needed anymore with this patch. Benjamin
On 22/04/2024 08:22, Benjamin Berg wrote: > On Mon, 2024-04-22 at 10:51 +0800, Tiwei Bie wrote: >> On 4/18/24 5:23 PM, benjamin@sipsolutions.net wrote: >>> diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h >>> index 37eb6e89e79a..bf8da736609c 100644 >>> --- a/arch/um/include/asm/mmu.h >>> +++ b/arch/um/include/asm/mmu.h >>> @@ -10,6 +10,10 @@ >>> >>> typedef struct mm_context { >>> struct mm_id id; >>> + >>> + /* Address range in need of a TLB sync */ >>> + long int sync_tlb_range_from; >>> + long int sync_tlb_range_to; >> >> Why not "unsigned long"? > > Oops, yes, it should be "unsigned long". > >> >>> } mm_context_t; >>> >>> extern void __switch_mm(struct mm_id * mm_idp); >>> diff --git a/arch/um/include/asm/pgtable.h >>> b/arch/um/include/asm/pgtable.h >>> index e1ece21dbe3f..5bb397b65efb 100644 >>> --- a/arch/um/include/asm/pgtable.h >>> +++ b/arch/um/include/asm/pgtable.h >>> @@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, >>> pte_t pteval) >>> >>> #define PFN_PTE_SHIFT PAGE_SHIFT >>> >>> +static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned >>> long start, >>> + unsigned long end) >>> +{ >>> + if (!mm->context.sync_tlb_range_to) { >>> + mm->context.sync_tlb_range_from = start; >>> + mm->context.sync_tlb_range_to = end; >>> + } else { >>> + if (start < mm->context.sync_tlb_range_from) >>> + mm->context.sync_tlb_range_from = start; >>> + if (end > mm->context.sync_tlb_range_to) >>> + mm->context.sync_tlb_range_to = end; >>> + } >>> +} >> >> IIUC, in some cases, the range [sync_tlb_range_from, sync_tlb_range_to) >> might become very large when merging non-adjacent ranges? Could that >> be an issue? > > I figured it is not a big problem. It will result in scanning the > entire page table once to check whether the NEW_PAGE bit is set on any > PTE. I am assuming that this will happen almost never and scanning the > page table (but not doing syscalls) is reasonably cheap at the end. > >>> diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h >>> index d7cf82023b74..62816f6f1c91 100644 >>> --- a/arch/um/include/asm/tlbflush.h >>> +++ b/arch/um/include/asm/tlbflush.h >>> @@ -9,24 +9,50 @@ >>> #include <linux/mm.h> >>> >>> /* >>> - * TLB flushing: >>> + * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls >>> + * from the process handling the MM (which can be the kernel itself). >>> + * >>> + * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes >>> + * we catch all PTE transitions where memory that was unusable becomes usable. >>> + * While with flush_tlb_* we can track any memory that becomes unusable and >>> + * even if a higher layer of the page table was modified. >>> + * >>> + * So, we simply track updates using both methods and mark the memory area to >>> + * be synced later on. The only special case is that flush_tlb_kern_* needs to >>> + * be executed immediately as there is no good synchronization point in that >>> + * case. In contrast, in the set_ptes case we can wait for the next kernel >>> + * segfault before we do the synchornization. >>> * >>> - * - flush_tlb() flushes the current mm struct TLBs >>> * - flush_tlb_all() flushes all processes TLBs >>> * - flush_tlb_mm(mm) flushes the specified mm context TLB's >>> * - flush_tlb_page(vma, vmaddr) flushes one page >>> - * - flush_tlb_kernel_vm() flushes the kernel vm area >>> * - flush_tlb_range(vma, start, end) flushes a range of pages >>> + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages >>> */ >>> >>> +extern int um_tlb_sync(struct mm_struct *mm); >>> + >>> extern void flush_tlb_all(void); >>> extern void flush_tlb_mm(struct mm_struct *mm); >>> -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, >>> - unsigned long end); >>> -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); >>> -extern void flush_tlb_kernel_vm(void); >>> -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); >>> -extern void __flush_tlb_one(unsigned long addr); >>> + >>> +static void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) >>> +{ >>> + um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE); >>> +} >>> + >>> +static void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, >>> + unsigned long end) >>> +{ >>> + um_tlb_mark_sync(vma->vm_mm, start, end); >>> +} >>> + >>> +static void flush_tlb_kernel_range(unsigned long start, unsigned long end) >>> +{ >>> + um_tlb_mark_sync(&init_mm, start, end); >>> + >>> + /* Kernel needs to be synced immediately */ >>> + um_tlb_sync(&init_mm); >>> +} >> >> Nit: this is a header file, these functions should be defined as inline functions. > > Yup, thanks! > >>> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c >>> index c137ff6f84dd..232aa7601d5d 100644 >>> --- a/arch/um/kernel/tlb.c >>> +++ b/arch/um/kernel/tlb.c >> [...] >>> >>> -void flush_tlb_kernel_range(unsigned long start, unsigned long >>> end) >>> -{ >>> - flush_tlb_kernel_range_common(start, end); >>> -} >>> - >>> -void flush_tlb_kernel_vm(void) >>> -{ >>> - flush_tlb_kernel_range_common(start_vm, end_vm); >>> -} >> >> The build breaks with this change, as there is still a call to >> flush_tlb_kernel_vm() in ubd. >> >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/um/drivers/ubd_kern.c?id=fb5d1d389c9e78d68f1f71f926d6251017579f5b#n774 > > Oh, thanks for the pointer! > > I do not see a good reason for that call to even exist. My best theory > right now is that it existed to avoid later pagefaults for new memory > regions (the vmalloc?). So a workaround that is not needed anymore with > this patch. It is there since prehistoric times. No idea why and what it's doing. IMHO it is not needed. > > Benjamin > > > >
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index 37eb6e89e79a..bf8da736609c 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -10,6 +10,10 @@ typedef struct mm_context { struct mm_id id; + + /* Address range in need of a TLB sync */ + long int sync_tlb_range_from; + long int sync_tlb_range_to; } mm_context_t; extern void __switch_mm(struct mm_id * mm_idp); diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index e1ece21dbe3f..5bb397b65efb 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) #define PFN_PTE_SHIFT PAGE_SHIFT +static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + if (!mm->context.sync_tlb_range_to) { + mm->context.sync_tlb_range_from = start; + mm->context.sync_tlb_range_to = end; + } else { + if (start < mm->context.sync_tlb_range_from) + mm->context.sync_tlb_range_from = start; + if (end > mm->context.sync_tlb_range_to) + mm->context.sync_tlb_range_to = end; + } +} + +#define set_ptes set_ptes +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int nr) +{ + /* Basically the default implementation */ + size_t length = nr * PAGE_SIZE; + + for (;;) { + set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); + } + + um_tlb_mark_sync(mm, addr, addr + length); +} + #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h index d7cf82023b74..62816f6f1c91 100644 --- a/arch/um/include/asm/tlbflush.h +++ b/arch/um/include/asm/tlbflush.h @@ -9,24 +9,50 @@ #include <linux/mm.h> /* - * TLB flushing: + * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls + * from the process handling the MM (which can be the kernel itself). + * + * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes + * we catch all PTE transitions where memory that was unusable becomes usable. + * While with flush_tlb_* we can track any memory that becomes unusable and + * even if a higher layer of the page table was modified. + * + * So, we simply track updates using both methods and mark the memory area to + * be synced later on. The only special case is that flush_tlb_kern_* needs to + * be executed immediately as there is no good synchronization point in that + * case. In contrast, in the set_ptes case we can wait for the next kernel + * segfault before we do the synchornization. * - * - flush_tlb() flushes the current mm struct TLBs * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_kernel_vm() flushes the kernel vm area * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages */ +extern int um_tlb_sync(struct mm_struct *mm); + extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); -extern void flush_tlb_kernel_vm(void); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -extern void __flush_tlb_one(unsigned long addr); + +static void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE); +} + +static void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + um_tlb_mark_sync(vma->vm_mm, start, end); +} + +static void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + um_tlb_mark_sync(&init_mm, start, end); + + /* Kernel needs to be synced immediately */ + um_tlb_sync(&init_mm); +} void report_enomem(void); diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index 5c78b0cc3dd4..ebaa116de30b 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -16,5 +16,6 @@ extern void handle_syscall(struct uml_pt_regs *regs); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); extern struct mm_id *current_mm_id(void); +extern void current_mm_sync(void); #endif diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index c7345c83e07b..26c12db3eca9 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -8,6 +8,8 @@ #include <linux/sched/task_stack.h> #include <linux/sched/task.h> +#include <asm/tlbflush.h> + #include <as-layout.h> #include <kern.h> #include <os.h> @@ -61,3 +63,11 @@ struct mm_id *current_mm_id(void) return ¤t->mm->context.id; } + +void current_mm_sync(void) +{ + if (current->mm == NULL) + return; + + um_tlb_sync(current->mm); +} diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index c137ff6f84dd..232aa7601d5d 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -169,14 +169,16 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, return ret; } -static int fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr) +int um_tlb_sync(struct mm_struct *mm) { pgd_t *pgd; struct vm_ops ops; - unsigned long addr = start_addr, next; + unsigned long addr = mm->context.sync_tlb_range_from, next; int ret = 0; + if (mm->context.sync_tlb_range_to == 0) + return 0; + ops.mm_idp = &mm->context.id; if (mm == &init_mm) { ops.mmap = kern_map; @@ -190,7 +192,7 @@ static int fix_range_common(struct mm_struct *mm, unsigned long start_addr, pgd = pgd_offset(mm, addr); do { - next = pgd_addr_end(addr, end_addr); + next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); if (!pgd_present(*pgd)) { if (pgd_newpage(*pgd)) { ret = ops.unmap(ops.mm_idp, addr, @@ -199,87 +201,16 @@ static int fix_range_common(struct mm_struct *mm, unsigned long start_addr, } } else ret = update_p4d_range(pgd, addr, next, &ops); - } while (pgd++, addr = next, ((addr < end_addr) && !ret)); + } while (pgd++, addr = next, + ((addr < mm->context.sync_tlb_range_to) && !ret)); if (ret == -ENOMEM) report_enomem(); - return ret; -} - -static void flush_tlb_kernel_range_common(unsigned long start, unsigned long end) -{ - int err; - - err = fix_range_common(&init_mm, start, end); - - if (err) - panic("flush_tlb_kernel failed, errno = %d\n", err); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - struct mm_struct *mm = vma->vm_mm; - int r, w, x, prot; - struct mm_id *mm_id; - - address &= PAGE_MASK; - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto kill; - - p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) - goto kill; - - pud = pud_offset(p4d, address); - if (!pud_present(*pud)) - goto kill; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto kill; - - pte = pte_offset_kernel(pmd, address); - - r = pte_read(*pte); - w = pte_write(*pte); - x = pte_exec(*pte); - if (!pte_young(*pte)) { - r = 0; - w = 0; - } else if (!pte_dirty(*pte)) { - w = 0; - } - - mm_id = &mm->context.id; - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | - (x ? UM_PROT_EXEC : 0)); - if (pte_newpage(*pte)) { - if (pte_present(*pte)) { - unsigned long long offset; - int fd; - - fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); - map(mm_id, address, PAGE_SIZE, prot, fd, offset); - } else - unmap(mm_id, address, PAGE_SIZE); - } else if (pte_newprot(*pte)) - protect(mm_id, address, PAGE_SIZE, prot); - - *pte = pte_mkuptodate(*pte); + mm->context.sync_tlb_range_from = 0; + mm->context.sync_tlb_range_to = 0; - return; - -kill: - printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); - force_sig(SIGKILL); + return ret; } void flush_tlb_all(void) @@ -294,54 +225,11 @@ void flush_tlb_all(void) flush_tlb_mm(current->mm); } -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_tlb_kernel_range_common(start, end); -} - -void flush_tlb_kernel_vm(void) -{ - flush_tlb_kernel_range_common(start_vm, end_vm); -} - -void __flush_tlb_one(unsigned long addr) -{ - flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); -} - -static void fix_range(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr) -{ - /* - * Don't bother flushing if this address space is about to be - * destroyed. - */ - if (atomic_read(&mm->mm_users) == 0) - return; - - fix_range_common(mm, start_addr, end_addr); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - if (vma->vm_mm == NULL) - flush_tlb_kernel_range_common(start, end); - else fix_range(vma->vm_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_range); - -void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - fix_range(mm, start, end); -} - void flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); for_each_vma(vmi, vma) - fix_range(mm, vma->vm_start, vma->vm_end); + um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 6d8ae86ae978..97c8df9c4401 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -113,7 +113,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif - flush_tlb_page(vma, address); + out: mmap_read_unlock(mm); out_nosemaphore: @@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && (address >= start_vm) && (address < end_vm)) { - flush_tlb_kernel_vm(); + if (!is_user && init_mm.context.sync_tlb_range_to) { + /* + * Kernel has pending updates from set_ptes that were not + * flushed yet. Syncing them should fix the pagefault (if not + * we'll get here again and panic). + */ + err = um_tlb_sync(&init_mm); + if (err == -ENOMEM) + report_enomem(); + if (err) + panic("Failed to sync kernel TLBs: %d", err); goto out; } else if (current->mm == NULL) { diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 8dca42627f39..06e23cf870e2 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -343,6 +343,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { + current_mm_sync(); + /* Flush out any pending syscalls */ err = syscall_stub_flush(current_mm_id()); if (err) {