Message ID | 1579681061.26081.48.camel@hbabu-laptop (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc/vas: Page fault handling for user space NX requests | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (20862247a368dbb75d6e97d82345999adaacf3cc) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 3 checks, 134 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
On Wed, 2020-01-22 at 00:17 -0800, Haren Myneni wrote: > For each fault CRB, update fault address in CRB (fault_storage_addr) > and translation error status in CSB so that user space can touch the > fault address and resend the request. If the user space passed invalid > CSB address send signal to process with SIGSEGV. > > Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> > Signed-off-by: Haren Myneni <haren@linux.ibm.com> > --- > arch/powerpc/platforms/powernv/vas-fault.c | 116 > +++++++++++++++++++++++++++++ > 1 file changed, 116 insertions(+) > > diff --git a/arch/powerpc/platforms/powernv/vas-fault.c > b/arch/powerpc/platforms/powernv/vas-fault.c > index 5c2cada..2cfab0c 100644 > --- a/arch/powerpc/platforms/powernv/vas-fault.c > +++ b/arch/powerpc/platforms/powernv/vas-fault.c > @@ -11,6 +11,7 @@ > #include <linux/slab.h> > #include <linux/uaccess.h> > #include <linux/kthread.h> > +#include <linux/sched/signal.h> > #include <linux/mmu_context.h> > #include <asm/icswx.h> > > @@ -26,6 +27,120 @@ > #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) > > /* > + * Update the CSB to indicate a translation error. > + * > + * If the fault is in the CSB address itself or if we are unable to > + * update the CSB, send a signal to the process, because we have no > + * other way of notifying the user process. > + * > + * Remaining settings in the CSB are based on wait_for_csb() of > + * NX-GZIP. > + */ > +static void update_csb(struct vas_window *window, > + struct coprocessor_request_block *crb) > +{ > + int rc; > + struct pid *pid; > + void __user *csb_addr; > + struct task_struct *tsk; > + struct kernel_siginfo info; > + struct coprocessor_status_block csb; > + > + /* > + * NX user space windows can not be opened for task->mm=NULL > + * and faults will not be generated for kernel requests. > + */ > + if (!window->mm || !window->user_win) > + return; > + > + csb_addr = (void *)be64_to_cpu(crb->csb_addr); > + > + csb.cc = CSB_CC_TRANSLATION; > + csb.ce = CSB_CE_TERMINATION; > + csb.cs = 0; > + csb.count = 0; > + > + /* > + * Returns the fault address in CPU format since it is passed with > + * signal. But if the user space expects BE format, need changes. > + * i.e either kernel (here) or user should convert to CPU format. > + * Not both! > + */ > + csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr); This looks wrong and I don't understand the comment. You need to convert this back to be64 to write it to csb.address. ie. csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr)); Which I think you can just avoid the endian conversion all together. > + csb.flags = 0; > + > + pid = window->pid; > + tsk = get_pid_task(pid, PIDTYPE_PID); > + /* > + * Send window will be closed after processing all NX requests > + * and process exits after closing all windows. In multi-thread > + * applications, thread may not exists, but does not close FD > + * (means send window) upon exit. Parent thread (tgid) can use > + * and close the window later. > + * pid and mm references are taken when window is opened by > + * process (pid). So tgid is used only when child thread opens > + * a window and exits without closing it in multithread tasks. > + */ > + if (!tsk) { > + pid = window->tgid; > + tsk = get_pid_task(pid, PIDTYPE_PID); > + /* > + * Parent thread will be closing window during its exit. > + * So should not get here. > + */ > + if (!tsk) > + return; > + } > + > + /* Return if the task is exiting. */ > + if (tsk->flags & PF_EXITING) { > + put_task_struct(tsk); > + return; > + } > + > + use_mm(window->mm); > + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); > + /* > + * User space polls on csb.flags (first byte). So add barrier > + * then copy first byte with csb flags update. > + */ > + smp_mb(); > + if (!rc) { > + csb.flags = CSB_V; > + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); > + } > + unuse_mm(window->mm); > + put_task_struct(tsk); > + > + /* Success */ > + if (!rc) > + return; > + > + pr_err("Invalid CSB address 0x%p signalling pid(%d)\n", > + csb_addr, pid_vnr(pid)); This is a userspace error, not a kernel error. This should not be a pr_err(). Userspace could spam the console with this. > + > + clear_siginfo(&info); > + info.si_signo = SIGSEGV; > + info.si_errno = EFAULT; > + info.si_code = SEGV_MAPERR; > + info.si_addr = csb_addr; > + > + /* > + * process will be polling on csb.flags after request is sent to > + * NX. So generally CSB update should not fail except when an > + * application does not follow the process properly. So an error > + * message will be displayed and leave it to user space whether > + * to ignore or handle this signal. > + */ > + rcu_read_lock(); > + rc = kill_pid_info(SIGSEGV, &info, pid); > + rcu_read_unlock(); why the rcu_read_un/lock() here? > + > + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, > + pid_vnr(pid), rc); > +} > + > +/* > * Process CRBs that we receive on the fault window. > */ > irqreturn_t vas_fault_handler(int irq, void *data) > @@ -104,6 +219,7 @@ irqreturn_t vas_fault_handler(int irq, void *data) > return IRQ_HANDLED; > } > > + update_csb(window, crb); > } while (true); > > return IRQ_HANDLED;
Mikey, Thanks for your review comments. On Fri, 2020-02-07 at 16:46 +1100, Michael Neuling wrote: > On Wed, 2020-01-22 at 00:17 -0800, Haren Myneni wrote: > > For each fault CRB, update fault address in CRB (fault_storage_addr) > > and translation error status in CSB so that user space can touch the > > fault address and resend the request. If the user space passed invalid > > CSB address send signal to process with SIGSEGV. > > > > Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> > > Signed-off-by: Haren Myneni <haren@linux.ibm.com> > > --- > > arch/powerpc/platforms/powernv/vas-fault.c | 116 > > +++++++++++++++++++++++++++++ > > 1 file changed, 116 insertions(+) > > > > diff --git a/arch/powerpc/platforms/powernv/vas-fault.c > > b/arch/powerpc/platforms/powernv/vas-fault.c > > index 5c2cada..2cfab0c 100644 > > --- a/arch/powerpc/platforms/powernv/vas-fault.c > > +++ b/arch/powerpc/platforms/powernv/vas-fault.c > > @@ -11,6 +11,7 @@ > > #include <linux/slab.h> > > #include <linux/uaccess.h> > > #include <linux/kthread.h> > > +#include <linux/sched/signal.h> > > #include <linux/mmu_context.h> > > #include <asm/icswx.h> > > > > @@ -26,6 +27,120 @@ > > #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) > > > > /* > > + * Update the CSB to indicate a translation error. > > + * > > + * If the fault is in the CSB address itself or if we are unable to > > + * update the CSB, send a signal to the process, because we have no > > + * other way of notifying the user process. > > + * > > + * Remaining settings in the CSB are based on wait_for_csb() of > > + * NX-GZIP. > > + */ > > +static void update_csb(struct vas_window *window, > > + struct coprocessor_request_block *crb) > > +{ > > + int rc; > > + struct pid *pid; > > + void __user *csb_addr; > > + struct task_struct *tsk; > > + struct kernel_siginfo info; > > + struct coprocessor_status_block csb; > > + > > + /* > > + * NX user space windows can not be opened for task->mm=NULL > > + * and faults will not be generated for kernel requests. > > + */ > > + if (!window->mm || !window->user_win) > > + return; > > + > > + csb_addr = (void *)be64_to_cpu(crb->csb_addr); > > + > > + csb.cc = CSB_CC_TRANSLATION; > > + csb.ce = CSB_CE_TERMINATION; > > + csb.cs = 0; > > + csb.count = 0; > > + > > + /* > > + * Returns the fault address in CPU format since it is passed with > > + * signal. But if the user space expects BE format, need changes. > > + * i.e either kernel (here) or user should convert to CPU format. > > + * Not both! > > + */ > > + csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr); > > This looks wrong and I don't understand the comment. You need to convert this > back to be64 to write it to csb.address. ie. > > csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr)); > > Which I think you can just avoid the endian conversion all together. NX pastes fault CRB in big-endian, so passing this address in CPU format to user space, otherwise the library has to convert. What is the standard way for passing to user space? > > > + csb.flags = 0; > > + > > + pid = window->pid; > > + tsk = get_pid_task(pid, PIDTYPE_PID); > > + /* > > + * Send window will be closed after processing all NX requests > > + * and process exits after closing all windows. In multi-thread > > + * applications, thread may not exists, but does not close FD > > + * (means send window) upon exit. Parent thread (tgid) can use > > + * and close the window later. > > + * pid and mm references are taken when window is opened by > > + * process (pid). So tgid is used only when child thread opens > > + * a window and exits without closing it in multithread tasks. > > + */ > > + if (!tsk) { > > + pid = window->tgid; > > + tsk = get_pid_task(pid, PIDTYPE_PID); > > + /* > > + * Parent thread will be closing window during its exit. > > + * So should not get here. > > + */ > > + if (!tsk) > > + return; > > + } > > + > > + /* Return if the task is exiting. */ > > + if (tsk->flags & PF_EXITING) { > > + put_task_struct(tsk); > > + return; > > + } > > + > > + use_mm(window->mm); > > + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); > > + /* > > + * User space polls on csb.flags (first byte). So add barrier > > + * then copy first byte with csb flags update. > > + */ > > + smp_mb(); > > + if (!rc) { > > + csb.flags = CSB_V; > > + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); > > + } > > + unuse_mm(window->mm); > > + put_task_struct(tsk); > > + > > + /* Success */ > > + if (!rc) > > + return; > > + > > + pr_err("Invalid CSB address 0x%p signalling pid(%d)\n", > > + csb_addr, pid_vnr(pid)); > > This is a userspace error, not a kernel error. This should not be a pr_err(). > > Userspace could spam the console with this. Will change it to pr_debug/info. Added pr_err() during development and missed to remove. > > > + > > + clear_siginfo(&info); > > + info.si_signo = SIGSEGV; > > + info.si_errno = EFAULT; > > + info.si_code = SEGV_MAPERR; > > + info.si_addr = csb_addr; > > + > > + /* > > + * process will be polling on csb.flags after request is sent to > > + * NX. So generally CSB update should not fail except when an > > + * application does not follow the process properly. So an error > > + * message will be displayed and leave it to user space whether > > + * to ignore or handle this signal. > > + */ > > + rcu_read_lock(); > > + rc = kill_pid_info(SIGSEGV, &info, pid); > > + rcu_read_unlock(); > > why the rcu_read_un/lock() here? Used same as in kill_proc_info()/kill_something_info() > > > + > > + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, > > + pid_vnr(pid), rc); > > +} > > + > > +/* > > * Process CRBs that we receive on the fault window. > > */ > > irqreturn_t vas_fault_handler(int irq, void *data) > > @@ -104,6 +219,7 @@ irqreturn_t vas_fault_handler(int irq, void *data) > > return IRQ_HANDLED; > > } > > > > + update_csb(window, crb); > > } while (true); > > > > return IRQ_HANDLED; >
> > > + > > > + csb.cc = CSB_CC_TRANSLATION; > > > + csb.ce = CSB_CE_TERMINATION; > > > + csb.cs = 0; > > > + csb.count = 0; > > > + > > > + /* > > > + * Returns the fault address in CPU format since it is passed with > > > + * signal. But if the user space expects BE format, need changes. > > > + * i.e either kernel (here) or user should convert to CPU format. > > > + * Not both! > > > + */ > > > + csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr); > > > > This looks wrong and I don't understand the comment. You need to convert > > this > > back to be64 to write it to csb.address. ie. > > > > csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr)); > > > > Which I think you can just avoid the endian conversion all together. > > NX pastes fault CRB in big-endian, so passing this address in CPU format > to user space, otherwise the library has to convert. OK, then please change the definition in struct coprocessor_status_block to just __u64. struct coprocessor_status_block { u8 flags; u8 cs; u8 cc; u8 ce; __be32 count; __be64 address; } __packed __aligned(CSB_ALIGN); Big but.... I thought "struct coprocessor_status_block" was also written by hardware. If that's the case then it needs to be __be64 and you need the kernel to synthesize exactly what the hardware is doing. Hence the struct definition is correct and the kernel needs to convert to _be64 on writing. > What is the standard way for passing to user space? CPU endian. > > > + * process will be polling on csb.flags after request is sent to > > > + * NX. So generally CSB update should not fail except when an > > > + * application does not follow the process properly. So an error > > > + * message will be displayed and leave it to user space whether > > > + * to ignore or handle this signal. > > > + */ > > > + rcu_read_lock(); > > > + rc = kill_pid_info(SIGSEGV, &info, pid); > > > + rcu_read_unlock(); > > > > why the rcu_read_un/lock() here? > > Used same as in kill_proc_info()/kill_something_info() Please document. Mikey
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 5c2cada..2cfab0c 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/kthread.h> +#include <linux/sched/signal.h> #include <linux/mmu_context.h> #include <asm/icswx.h> @@ -26,6 +27,120 @@ #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) /* + * Update the CSB to indicate a translation error. + * + * If the fault is in the CSB address itself or if we are unable to + * update the CSB, send a signal to the process, because we have no + * other way of notifying the user process. + * + * Remaining settings in the CSB are based on wait_for_csb() of + * NX-GZIP. + */ +static void update_csb(struct vas_window *window, + struct coprocessor_request_block *crb) +{ + int rc; + struct pid *pid; + void __user *csb_addr; + struct task_struct *tsk; + struct kernel_siginfo info; + struct coprocessor_status_block csb; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (!window->mm || !window->user_win) + return; + + csb_addr = (void *)be64_to_cpu(crb->csb_addr); + + csb.cc = CSB_CC_TRANSLATION; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * Returns the fault address in CPU format since it is passed with + * signal. But if the user space expects BE format, need changes. + * i.e either kernel (here) or user should convert to CPU format. + * Not both! + */ + csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr); + csb.flags = 0; + + pid = window->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Send window will be closed after processing all NX requests + * and process exits after closing all windows. In multi-thread + * applications, thread may not exists, but does not close FD + * (means send window) upon exit. Parent thread (tgid) can use + * and close the window later. + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it in multithread tasks. + */ + if (!tsk) { + pid = window->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread will be closing window during its exit. + * So should not get here. + */ + if (!tsk) + return; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return; + } + + use_mm(window->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + smp_mb(); + if (!rc) { + csb.flags = CSB_V; + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + unuse_mm(window->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + pr_err("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application does not follow the process properly. So an error + * message will be displayed and leave it to user space whether + * to ignore or handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +/* * Process CRBs that we receive on the fault window. */ irqreturn_t vas_fault_handler(int irq, void *data) @@ -104,6 +219,7 @@ irqreturn_t vas_fault_handler(int irq, void *data) return IRQ_HANDLED; } + update_csb(window, crb); } while (true); return IRQ_HANDLED;