Message ID | 20190104100517.9809-1-anton.ivanov@cambridgegreys.com |
---|---|
State | Superseded |
Headers | show |
Series | um: Try to avoid kmalloc in signal handling | expand |
Am Freitag, 4. Januar 2019, 11:05:17 CET schrieb anton.ivanov@cambridgegreys.com: > From: Anton Ivanov <anton.ivanov@cambridgegreys.com> > > Signal handling (which maps to interrupt handling in UML) needs > to pass current registers to the relevant handlers and was > allocating a structure for that using kmalloc. It is possible > to avoid this kmalloc by using a small "signal register stack". > A depth of 4 suffices for normal use. If it is exceeded > further sets of registers are allocated as before via kmalloc. > > The end result is >10% performance increase in networking > as measured by iperf and >5% across the board. > > Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> > --- > arch/um/include/shared/os.h | 4 ++++ > arch/um/kernel/signal.c | 31 +++++++++++++++++++++++++++++++ > arch/um/os-Linux/signal.c | 16 ++++------------ > 3 files changed, 39 insertions(+), 12 deletions(-) > > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index ebf23012a59b..4ec1bc63213b 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -319,4 +319,8 @@ extern unsigned long os_get_top_address(void); > > long syscall(long number, ...); > > +/* signal.c */ > +extern struct uml_pt_regs *get_save_register_state(void); > +extern void release_save_register_state(struct uml_pt_regs *r); > + > #endif > diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c > index 57acbd67d85d..558ac7e4df97 100644 > --- a/arch/um/kernel/signal.c > +++ b/arch/um/kernel/signal.c > @@ -6,11 +6,18 @@ > #include <linux/module.h> > #include <linux/ptrace.h> > #include <linux/sched.h> > +#include <linux/slab.h> > #include <asm/siginfo.h> > #include <asm/signal.h> > #include <asm/unistd.h> > #include <frame_kern.h> > #include <kern_util.h> > +#include <os.h> > + > +static atomic_t reg_depth; > + > +static struct uml_pt_regs reg_file[4]; > +#define REG_SWITCH_TO_KMALLOC_DEPTH 3 I fear this is not correct. Think of task switching, if you push something to reg_file[], it is not guaranteed that the same task will run next and pop the item. The order can change. Thanks, //richard
On 1/4/19 10:13 AM, Richard Weinberger wrote: > Am Freitag, 4. Januar 2019, 11:05:17 CET schrieb anton.ivanov@cambridgegreys.com: >> From: Anton Ivanov <anton.ivanov@cambridgegreys.com> >> >> Signal handling (which maps to interrupt handling in UML) needs >> to pass current registers to the relevant handlers and was >> allocating a structure for that using kmalloc. It is possible >> to avoid this kmalloc by using a small "signal register stack". >> A depth of 4 suffices for normal use. If it is exceeded >> further sets of registers are allocated as before via kmalloc. >> >> The end result is >10% performance increase in networking >> as measured by iperf and >5% across the board. >> >> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> >> --- >> arch/um/include/shared/os.h | 4 ++++ >> arch/um/kernel/signal.c | 31 +++++++++++++++++++++++++++++++ >> arch/um/os-Linux/signal.c | 16 ++++------------ >> 3 files changed, 39 insertions(+), 12 deletions(-) >> >> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h >> index ebf23012a59b..4ec1bc63213b 100644 >> --- a/arch/um/include/shared/os.h >> +++ b/arch/um/include/shared/os.h >> @@ -319,4 +319,8 @@ extern unsigned long os_get_top_address(void); >> >> long syscall(long number, ...); >> >> +/* signal.c */ >> +extern struct uml_pt_regs *get_save_register_state(void); >> +extern void release_save_register_state(struct uml_pt_regs *r); >> + >> #endif >> diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c >> index 57acbd67d85d..558ac7e4df97 100644 >> --- a/arch/um/kernel/signal.c >> +++ b/arch/um/kernel/signal.c >> @@ -6,11 +6,18 @@ >> #include <linux/module.h> >> #include <linux/ptrace.h> >> #include <linux/sched.h> >> +#include <linux/slab.h> >> #include <asm/siginfo.h> >> #include <asm/signal.h> >> #include <asm/unistd.h> >> #include <frame_kern.h> >> #include <kern_util.h> >> +#include <os.h> >> + >> +static atomic_t reg_depth; >> + >> +static struct uml_pt_regs reg_file[4]; >> +#define REG_SWITCH_TO_KMALLOC_DEPTH 3 > > I fear this is not correct. > Think of task switching, if you push something to reg_file[], > it is not guaranteed that the same task will run next and pop the item. > The order can change. It should not occur for the signals that map onto IRQ (timer and IO) - there it is all kernel. I cannot trigger that for the other ones, but it may be possible. IMO we should just bite the bullet and go back to using the stack increasing the minimum stack order needed to 1 for 32 bit and to 2 for 64 bit. x86 if I read it correctly is still using the stack even for the new register sets. > > Thanks, > //richard > > > > >
Am Freitag, 4. Januar 2019, 11:19:46 CET schrieb Anton Ivanov: > > I fear this is not correct. > > Think of task switching, if you push something to reg_file[], > > it is not guaranteed that the same task will run next and pop the item. > > The order can change. > > It should not occur for the signals that map onto IRQ (timer and IO) - > there it is all kernel. > > I cannot trigger that for the other ones, but it may be possible. We need to be super sure about this. > IMO we should just bite the bullet and go back to using the stack > increasing the minimum stack order needed to 1 for 32 bit and to 2 for > 64 bit. Increasing the kernel stack just for this is also a huge overhead. But I never benchmarked it myself. > x86 if I read it correctly is still using the stack even for the new > register sets. Are you sure? I don't think so. The kernel stack is small and the xregset is huge. Where did you see this? Thanks, //richard
On 1/4/19 10:33 AM, Richard Weinberger wrote: > Am Freitag, 4. Januar 2019, 11:19:46 CET schrieb Anton Ivanov: >>> I fear this is not correct. >>> Think of task switching, if you push something to reg_file[], >>> it is not guaranteed that the same task will run next and pop the item. >>> The order can change. >> >> It should not occur for the signals that map onto IRQ (timer and IO) - >> there it is all kernel. >> >> I cannot trigger that for the other ones, but it may be possible. > > We need to be super sure about this. I am pretty sure about the IRQ ones. That call chain goes cleanly down and up resulting in whatever was pushed to be popped. I am trying to get my head around what can possibly happen in the various fault ones that will result in a task switch to a task which is half-done with handling signal and is about to return (and pop the wrong one). > >> IMO we should just bite the bullet and go back to using the stack >> increasing the minimum stack order needed to 1 for 32 bit and to 2 for >> 64 bit. > > Increasing the kernel stack just for this is also a huge overhead. > But I never benchmarked it myself. > >> x86 if I read it correctly is still using the stack even for the new >> register sets. > > Are you sure? I don't think so. The kernel stack is small and the xregset is > huge. > Where did you see this? It looked like that when I skimmed across get_sigframe and fpu_alloc_mathframe in x86 signal.c in first read, but I may be wrong. > > Thanks, > //richard > > >
On 1/4/19 10:33 AM, Richard Weinberger wrote: > Am Freitag, 4. Januar 2019, 11:19:46 CET schrieb Anton Ivanov: >>> I fear this is not correct. >>> Think of task switching, if you push something to reg_file[], >>> it is not guaranteed that the same task will run next and pop the item. >>> The order can change. >> >> It should not occur for the signals that map onto IRQ (timer and IO) - >> there it is all kernel. >> >> I cannot trigger that for the other ones, but it may be possible. > > We need to be super sure about this. > >> IMO we should just bite the bullet and go back to using the stack >> increasing the minimum stack order needed to 1 for 32 bit and to 2 for >> 64 bit. > > Increasing the kernel stack just for this is also a huge overhead. > But I never benchmarked it myself. I just did. I wrote a quick alternative to put it back on the stack. Stack is marginally ~ 1% or thereabouts faster than the data segment patch on network tests. That makes it 14% faster than kmalloc-ing the pt_regs in the signal handling path. The difference for other stuff (disk, etc) is within statistical error. It is indeed using more stack - the minimum stack order needed for my setup on 64 bit is now 2 (On AMD A12). > >> x86 if I read it correctly is still using the stack even for the new >> register sets. > > Are you sure? I don't think so. The kernel stack is small and the xregset is > huge. > Where did you see this? > > Thanks, > //richard > > >
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index ebf23012a59b..4ec1bc63213b 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -319,4 +319,8 @@ extern unsigned long os_get_top_address(void); long syscall(long number, ...); +/* signal.c */ +extern struct uml_pt_regs *get_save_register_state(void); +extern void release_save_register_state(struct uml_pt_regs *r); + #endif diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 57acbd67d85d..558ac7e4df97 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -6,11 +6,18 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/siginfo.h> #include <asm/signal.h> #include <asm/unistd.h> #include <frame_kern.h> #include <kern_util.h> +#include <os.h> + +static atomic_t reg_depth; + +static struct uml_pt_regs reg_file[4]; +#define REG_SWITCH_TO_KMALLOC_DEPTH 3 EXPORT_SYMBOL(block_signals); EXPORT_SYMBOL(unblock_signals); @@ -111,3 +118,27 @@ void do_signal(struct pt_regs *regs) if (!handled_sig) restore_saved_sigmask(); } + +struct uml_pt_regs *get_save_register_state(void) +{ + struct uml_pt_regs *r; + int depth = atomic_inc_return(®_depth); + + if (depth > REG_SWITCH_TO_KMALLOC_DEPTH) { + WARN(1, "Exceeded uml save register space, should not occur"); + r = kmalloc(sizeof(struct uml_pt_regs), GFP_ATOMIC); + if (!r) + panic("Cannot allocate extra save register space"); + } else { + r = ®_file[depth - 1]; + } + return r; +} + +void release_save_register_state(struct uml_pt_regs *r) +{ + if (atomic_dec_return(®_depth) > REG_SWITCH_TO_KMALLOC_DEPTH) + kfree(r); +} + + diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index bf0acb8aad8b..81765abe1c15 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -31,13 +31,9 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { - struct uml_pt_regs *r; + struct uml_pt_regs *r = get_save_register_state(); int save_errno = errno; - r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC); - if (!r) - panic("out of memory"); - r->is_user = 0; if (sig == SIGSEGV) { /* For segfaults, we want the data from the sigcontext. */ @@ -53,7 +49,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) errno = save_errno; - free(r); + release_save_register_state(r); } /* @@ -91,17 +87,13 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) static void timer_real_alarm_handler(mcontext_t *mc) { - struct uml_pt_regs *regs; - - regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC); - if (!regs) - panic("out of memory"); + struct uml_pt_regs *regs = get_save_register_state(); if (mc != NULL) get_regs_from_mc(regs, mc); timer_handler(SIGALRM, NULL, regs); - free(regs); + release_save_register_state(regs); } void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)