Message ID | a00a498ea9803ac3c6f2b933203a2675d1785208.1731290567.git.thehajime@gmail.com |
---|---|
State | RFC |
Headers | show |
Series | nommu UML | expand |
Hi, On Mon, 2024-11-11 at 15:27 +0900, Hajime Tazaki wrote: > On !MMU mode, the address of vdso is accessible from userspace. This > commit implements the entry point by pointing a block of page address. > > This commit also add memory permission configuration of vdso page to be > executable. > > Signed-off-by: Hajime Tazaki <thehajime@gmail.com> > Signed-off-by: Ricardo Koller <ricarkol@google.com> > --- > arch/x86/um/vdso/um_vdso.c | 20 ++++++++++++++++++++ > arch/x86/um/vdso/vma.c | 14 ++++++++++++++ > 2 files changed, 34 insertions(+) > > diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c > index cbae2584124f..eff3e6641a0e 100644 > --- a/arch/x86/um/vdso/um_vdso.c > +++ b/arch/x86/um/vdso/um_vdso.c > @@ -23,10 +23,17 @@ int __vdso_clock_gettime(clockid_t clock, struct > __kernel_old_timespec *ts) > { > long ret; > > +#ifdef CONFIG_MMU > asm("syscall" > : "=a" (ret) > : "0" (__NR_clock_gettime), "D" (clock), "S" (ts) > : "rcx", "r11", "memory"); > +#else > + asm("call *%1" > + : "=a" (ret) > + : "0" ((unsigned long)__NR_clock_gettime), "D" > (clock), "S" (ts) > + : "rcx", "r11", "memory"); > +#endif > > return ret; > } > @@ -37,10 +44,17 @@ int __vdso_gettimeofday(struct > __kernel_old_timeval *tv, struct timezone *tz) > { > long ret; > > +#ifdef CONFIG_MMU > asm("syscall" > : "=a" (ret) > : "0" (__NR_gettimeofday), "D" (tv), "S" (tz) > : "rcx", "r11", "memory"); > +#else > + asm("call *%1" > + : "=a" (ret) > + : "0" ((unsigned long)__NR_gettimeofday), "D" (tv), > "S" (tz) > + : "rcx", "r11", "memory"); > +#endif > > return ret; > } > @@ -51,9 +65,15 @@ __kernel_old_time_t > __vdso_time(__kernel_old_time_t *t) > { > long secs; > > +#ifdef CONFIG_MMU > asm volatile("syscall" > : "=a" (secs) > : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", > "memory"); > +#else > + asm("call *%1" > + : "=a" (secs) > + : "0" ((unsigned long)__NR_time), "D" (t) : "cc", > "r11", "cx", "memory"); > +#endif Maybe introduce a macro for "syscall" vs. "call *%1"? The parameters should be identical in both cases. The "call" could probably even jump to the end of the NOP ramp directly in this case. Though maybe I am missing something with the "(unsigned long)" cast? > return secs; > } > diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c > index f238f7b33cdd..83c861e2a815 100644 > --- a/arch/x86/um/vdso/vma.c > +++ b/arch/x86/um/vdso/vma.c > @@ -9,6 +9,7 @@ > #include <asm/page.h> > #include <asm/elf.h> > #include <linux/init.h> > +#include <os.h> > > static unsigned int __read_mostly vdso_enabled = 1; > unsigned long um_vdso_addr; > @@ -24,7 +25,9 @@ static int __init init_vdso(void) > > BUG_ON(vdso_end - vdso_start > PAGE_SIZE); > > +#ifdef CONFIG_MMU > um_vdso_addr = task_size - PAGE_SIZE; > +#endif > > vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL); > if (!vdsop) > @@ -40,6 +43,15 @@ static int __init init_vdso(void) > copy_page(page_address(um_vdso), vdso_start); > *vdsop = um_vdso; > > +#ifndef CONFIG_MMU > + /* this is fine with NOMMU as everything is accessible */ > + um_vdso_addr = (unsigned long)page_address(um_vdso); > + os_protect_memory((void *)um_vdso_addr, vdso_end - > vdso_start, 1, 1, 1); I think this should be "1, 0, 1", i.e. we shouldn't enable write access. > + pr_debug("vdso_start=%lx um_vdso_addr=%lx pg_um_vdso=%lx", > + (unsigned long)vdso_start, um_vdso_addr, > + (unsigned long)page_address(um_vdso)); > +#endif > + > return 0; > > oom: > @@ -50,6 +62,7 @@ static int __init init_vdso(void) > } > subsys_initcall(init_vdso); > > +#ifdef CONFIG_MMU > int arch_setup_additional_pages(struct linux_binprm *bprm, int > uses_interp) > { > struct vm_area_struct *vma; > @@ -74,3 +87,4 @@ int arch_setup_additional_pages(struct linux_binprm > *bprm, int uses_interp) > > return IS_ERR(vma) ? PTR_ERR(vma) : 0; > } > +#endif
Thanks Benjamin, On Wed, 27 Nov 2024 19:36:44 +0900, Benjamin Berg wrote: > > @@ -51,9 +65,15 @@ __kernel_old_time_t > > __vdso_time(__kernel_old_time_t *t) > > { > > long secs; > > > > +#ifdef CONFIG_MMU > > asm volatile("syscall" > > : "=a" (secs) > > : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", > > "memory"); > > +#else > > + asm("call *%1" > > + : "=a" (secs) > > + : "0" ((unsigned long)__NR_time), "D" (t) : "cc", > > "r11", "cx", "memory"); > > +#endif > > Maybe introduce a macro for "syscall" vs. "call *%1"? The parameters > should be identical in both cases. I think it's nice to clean up this part. > The "call" could probably even jump > to the end of the NOP ramp directly in this case. ah, that would be a bit of speed ups. confirmed that it works. I'll update it if the code readability doesn't become worse. > Though maybe I am missing something with the "(unsigned long)" cast? without this, gcc says CC arch/x86/um/vdso/um_vdso.o ../arch/x86/um/vdso/um_vdso.c: Assembler messages: ../arch/x86/um/vdso/um_vdso.c:50: Error: operand size mismatch for `call' so the cast intended to silence it. but if I changed the constraint like below, the error is gone. - : "0" ((unsigned long)__NR_time), "D" (t) : "cc", + : "a" (__NR_time), "D" (t) : "cc", I will clean it up as well. > > return secs; > > } > > diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c > > index f238f7b33cdd..83c861e2a815 100644 > > --- a/arch/x86/um/vdso/vma.c > > +++ b/arch/x86/um/vdso/vma.c > > @@ -9,6 +9,7 @@ > > #include <asm/page.h> > > #include <asm/elf.h> > > #include <linux/init.h> > > +#include <os.h> > > > > static unsigned int __read_mostly vdso_enabled = 1; > > unsigned long um_vdso_addr; > > @@ -24,7 +25,9 @@ static int __init init_vdso(void) > > > > BUG_ON(vdso_end - vdso_start > PAGE_SIZE); > > > > +#ifdef CONFIG_MMU > > um_vdso_addr = task_size - PAGE_SIZE; > > +#endif > > > > vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL); > > if (!vdsop) > > @@ -40,6 +43,15 @@ static int __init init_vdso(void) > > copy_page(page_address(um_vdso), vdso_start); > > *vdsop = um_vdso; > > > > +#ifndef CONFIG_MMU > > + /* this is fine with NOMMU as everything is accessible */ > > + um_vdso_addr = (unsigned long)page_address(um_vdso); > > + os_protect_memory((void *)um_vdso_addr, vdso_end - > > vdso_start, 1, 1, 1); > > I think this should be "1, 0, 1", i.e. we shouldn't enable write > access. not sure if this relates to but with PROT_WRITE off I cannot put a breakpoint with gdb on vdso area. normal execution (without gdb) looks fine. -- Hajime
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c index cbae2584124f..eff3e6641a0e 100644 --- a/arch/x86/um/vdso/um_vdso.c +++ b/arch/x86/um/vdso/um_vdso.c @@ -23,10 +23,17 @@ int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts) { long ret; +#ifdef CONFIG_MMU asm("syscall" : "=a" (ret) : "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "rcx", "r11", "memory"); +#else + asm("call *%1" + : "=a" (ret) + : "0" ((unsigned long)__NR_clock_gettime), "D" (clock), "S" (ts) + : "rcx", "r11", "memory"); +#endif return ret; } @@ -37,10 +44,17 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { long ret; +#ifdef CONFIG_MMU asm("syscall" : "=a" (ret) : "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "rcx", "r11", "memory"); +#else + asm("call *%1" + : "=a" (ret) + : "0" ((unsigned long)__NR_gettimeofday), "D" (tv), "S" (tz) + : "rcx", "r11", "memory"); +#endif return ret; } @@ -51,9 +65,15 @@ __kernel_old_time_t __vdso_time(__kernel_old_time_t *t) { long secs; +#ifdef CONFIG_MMU asm volatile("syscall" : "=a" (secs) : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); +#else + asm("call *%1" + : "=a" (secs) + : "0" ((unsigned long)__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); +#endif return secs; } diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index f238f7b33cdd..83c861e2a815 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -9,6 +9,7 @@ #include <asm/page.h> #include <asm/elf.h> #include <linux/init.h> +#include <os.h> static unsigned int __read_mostly vdso_enabled = 1; unsigned long um_vdso_addr; @@ -24,7 +25,9 @@ static int __init init_vdso(void) BUG_ON(vdso_end - vdso_start > PAGE_SIZE); +#ifdef CONFIG_MMU um_vdso_addr = task_size - PAGE_SIZE; +#endif vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL); if (!vdsop) @@ -40,6 +43,15 @@ static int __init init_vdso(void) copy_page(page_address(um_vdso), vdso_start); *vdsop = um_vdso; +#ifndef CONFIG_MMU + /* this is fine with NOMMU as everything is accessible */ + um_vdso_addr = (unsigned long)page_address(um_vdso); + os_protect_memory((void *)um_vdso_addr, vdso_end - vdso_start, 1, 1, 1); + pr_debug("vdso_start=%lx um_vdso_addr=%lx pg_um_vdso=%lx", + (unsigned long)vdso_start, um_vdso_addr, + (unsigned long)page_address(um_vdso)); +#endif + return 0; oom: @@ -50,6 +62,7 @@ static int __init init_vdso(void) } subsys_initcall(init_vdso); +#ifdef CONFIG_MMU int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct vm_area_struct *vma; @@ -74,3 +87,4 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return IS_ERR(vma) ? PTR_ERR(vma) : 0; } +#endif