diff mbox series

[V5,09/14] powerpc/vas: Update CSB and notify process for fault CRBs

Message ID 1579681061.26081.48.camel@hbabu-laptop (mailing list archive)
State Superseded
Headers show
Series powerpc/vas: Page fault handling for user space NX requests | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (20862247a368dbb75d6e97d82345999adaacf3cc)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 3 checks, 134 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Haren Myneni Jan. 22, 2020, 8:17 a.m. UTC
For each fault CRB, update fault address in CRB (fault_storage_addr)
and translation error status in CSB so that user space can touch the
fault address and resend the request. If the user space passed invalid
CSB address send signal to process with SIGSEGV.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Haren Myneni <haren@linux.ibm.com>
---
 arch/powerpc/platforms/powernv/vas-fault.c | 116 +++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

Comments

Michael Neuling Feb. 7, 2020, 5:46 a.m. UTC | #1
On Wed, 2020-01-22 at 00:17 -0800, Haren Myneni wrote:
> For each fault CRB, update fault address in CRB (fault_storage_addr)
> and translation error status in CSB so that user space can touch the
> fault address and resend the request. If the user space passed invalid
> CSB address send signal to process with SIGSEGV.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> ---
>  arch/powerpc/platforms/powernv/vas-fault.c | 116
> +++++++++++++++++++++++++++++
>  1 file changed, 116 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/vas-fault.c
> b/arch/powerpc/platforms/powernv/vas-fault.c
> index 5c2cada..2cfab0c 100644
> --- a/arch/powerpc/platforms/powernv/vas-fault.c
> +++ b/arch/powerpc/platforms/powernv/vas-fault.c
> @@ -11,6 +11,7 @@
>  #include <linux/slab.h>
>  #include <linux/uaccess.h>
>  #include <linux/kthread.h>
> +#include <linux/sched/signal.h>
>  #include <linux/mmu_context.h>
>  #include <asm/icswx.h>
>  
> @@ -26,6 +27,120 @@
>  #define VAS_FAULT_WIN_FIFO_SIZE	(4 << 20)
>  
>  /*
> + * Update the CSB to indicate a translation error.
> + *
> + * If the fault is in the CSB address itself or if we are unable to
> + * update the CSB, send a signal to the process, because we have no
> + * other way of notifying the user process.
> + *
> + * Remaining settings in the CSB are based on wait_for_csb() of
> + * NX-GZIP.
> + */
> +static void update_csb(struct vas_window *window,
> +			struct coprocessor_request_block *crb)
> +{
> +	int rc;
> +	struct pid *pid;
> +	void __user *csb_addr;
> +	struct task_struct *tsk;
> +	struct kernel_siginfo info;
> +	struct coprocessor_status_block csb;
> +
> +	/*
> +	 * NX user space windows can not be opened for task->mm=NULL
> +	 * and faults will not be generated for kernel requests.
> +	 */
> +	if (!window->mm || !window->user_win)
> +		return;
> +
> +	csb_addr = (void *)be64_to_cpu(crb->csb_addr);
> +
> +	csb.cc = CSB_CC_TRANSLATION;
> +	csb.ce = CSB_CE_TERMINATION;
> +	csb.cs = 0;
> +	csb.count = 0;
> +
> +	/*
> +	 * Returns the fault address in CPU format since it is passed with
> +	 * signal. But if the user space expects BE format, need changes.
> +	 * i.e either kernel (here) or user should convert to CPU format.
> +	 * Not both!
> +	 */
> +	csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr);

This looks wrong and I don't understand the comment. You need to convert this
back to be64 to write it to csb.address. ie.

  csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr));

Which I think you can just avoid the endian conversion all together.

> +	csb.flags = 0;
> +
> +	pid = window->pid;
> +	tsk = get_pid_task(pid, PIDTYPE_PID);
> +	/*
> +	 * Send window will be closed after processing all NX requests
> +	 * and process exits after closing all windows. In multi-thread
> +	 * applications, thread may not exists, but does not close FD
> +	 * (means send window) upon exit. Parent thread (tgid) can use
> +	 * and close the window later.
> +	 * pid and mm references are taken when window is opened by
> +	 * process (pid). So tgid is used only when child thread opens
> +	 * a window and exits without closing it in multithread tasks.
> +	 */
> +	if (!tsk) {
> +		pid = window->tgid;
> +		tsk = get_pid_task(pid, PIDTYPE_PID);
> +		/*
> +		 * Parent thread will be closing window during its exit.
> +		 * So should not get here.
> +		 */
> +		if (!tsk)
> +			return;
> +	}
> +
> +	/* Return if the task is exiting. */
> +	if (tsk->flags & PF_EXITING) {
> +		put_task_struct(tsk);
> +		return;
> +	}
> +
> +	use_mm(window->mm);
> +	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
> +	/*
> +	 * User space polls on csb.flags (first byte). So add barrier
> +	 * then copy first byte with csb flags update.
> +	 */
> +	smp_mb();
> +	if (!rc) {
> +		csb.flags = CSB_V;
> +		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
> +	}
> +	unuse_mm(window->mm);
> +	put_task_struct(tsk);
> +
> +	/* Success */
> +	if (!rc)
> +		return;
> +
> +	pr_err("Invalid CSB address 0x%p signalling pid(%d)\n",
> +			csb_addr, pid_vnr(pid));

This is a userspace error, not a kernel error. This should not be a pr_err().

Userspace could spam the console with this.

> +
> +	clear_siginfo(&info);
> +	info.si_signo = SIGSEGV;
> +	info.si_errno = EFAULT;
> +	info.si_code = SEGV_MAPERR;
> +	info.si_addr = csb_addr;
> +
> +	/*
> +	 * process will be polling on csb.flags after request is sent to
> +	 * NX. So generally CSB update should not fail except when an
> +	 * application does not follow the process properly. So an error
> +	 * message will be displayed and leave it to user space whether
> +	 * to ignore or handle this signal.
> +	 */
> +	rcu_read_lock();
> +	rc = kill_pid_info(SIGSEGV, &info, pid);
> +	rcu_read_unlock();

why the rcu_read_un/lock() here?

> +
> +	pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
> +			pid_vnr(pid), rc);
> +}
> +
> +/*
>   * Process CRBs that we receive on the fault window.
>   */
>  irqreturn_t vas_fault_handler(int irq, void *data)
> @@ -104,6 +219,7 @@ irqreturn_t vas_fault_handler(int irq, void *data)
>  			return IRQ_HANDLED;
>  		}
>  
> +		update_csb(window, crb);
>  	} while (true);
>  
>  	return IRQ_HANDLED;
Haren Myneni Feb. 10, 2020, 5:12 a.m. UTC | #2
Mikey, Thanks for your review comments.

On Fri, 2020-02-07 at 16:46 +1100, Michael Neuling wrote:
> On Wed, 2020-01-22 at 00:17 -0800, Haren Myneni wrote:
> > For each fault CRB, update fault address in CRB (fault_storage_addr)
> > and translation error status in CSB so that user space can touch the
> > fault address and resend the request. If the user space passed invalid
> > CSB address send signal to process with SIGSEGV.
> > 
> > Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> > Signed-off-by: Haren Myneni <haren@linux.ibm.com>
> > ---
> >  arch/powerpc/platforms/powernv/vas-fault.c | 116
> > +++++++++++++++++++++++++++++
> >  1 file changed, 116 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/vas-fault.c
> > b/arch/powerpc/platforms/powernv/vas-fault.c
> > index 5c2cada..2cfab0c 100644
> > --- a/arch/powerpc/platforms/powernv/vas-fault.c
> > +++ b/arch/powerpc/platforms/powernv/vas-fault.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/slab.h>
> >  #include <linux/uaccess.h>
> >  #include <linux/kthread.h>
> > +#include <linux/sched/signal.h>
> >  #include <linux/mmu_context.h>
> >  #include <asm/icswx.h>
> >  
> > @@ -26,6 +27,120 @@
> >  #define VAS_FAULT_WIN_FIFO_SIZE	(4 << 20)
> >  
> >  /*
> > + * Update the CSB to indicate a translation error.
> > + *
> > + * If the fault is in the CSB address itself or if we are unable to
> > + * update the CSB, send a signal to the process, because we have no
> > + * other way of notifying the user process.
> > + *
> > + * Remaining settings in the CSB are based on wait_for_csb() of
> > + * NX-GZIP.
> > + */
> > +static void update_csb(struct vas_window *window,
> > +			struct coprocessor_request_block *crb)
> > +{
> > +	int rc;
> > +	struct pid *pid;
> > +	void __user *csb_addr;
> > +	struct task_struct *tsk;
> > +	struct kernel_siginfo info;
> > +	struct coprocessor_status_block csb;
> > +
> > +	/*
> > +	 * NX user space windows can not be opened for task->mm=NULL
> > +	 * and faults will not be generated for kernel requests.
> > +	 */
> > +	if (!window->mm || !window->user_win)
> > +		return;
> > +
> > +	csb_addr = (void *)be64_to_cpu(crb->csb_addr);
> > +
> > +	csb.cc = CSB_CC_TRANSLATION;
> > +	csb.ce = CSB_CE_TERMINATION;
> > +	csb.cs = 0;
> > +	csb.count = 0;
> > +
> > +	/*
> > +	 * Returns the fault address in CPU format since it is passed with
> > +	 * signal. But if the user space expects BE format, need changes.
> > +	 * i.e either kernel (here) or user should convert to CPU format.
> > +	 * Not both!
> > +	 */
> > +	csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr);
> 
> This looks wrong and I don't understand the comment. You need to convert this
> back to be64 to write it to csb.address. ie.
> 
>   csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr));
> 
> Which I think you can just avoid the endian conversion all together.

NX pastes fault CRB in big-endian, so passing this address in CPU format
to user space, otherwise the library has to convert. 

What is the standard way for passing to user space? 

> 
> > +	csb.flags = 0;
> > +
> > +	pid = window->pid;
> > +	tsk = get_pid_task(pid, PIDTYPE_PID);
> > +	/*
> > +	 * Send window will be closed after processing all NX requests
> > +	 * and process exits after closing all windows. In multi-thread
> > +	 * applications, thread may not exists, but does not close FD
> > +	 * (means send window) upon exit. Parent thread (tgid) can use
> > +	 * and close the window later.
> > +	 * pid and mm references are taken when window is opened by
> > +	 * process (pid). So tgid is used only when child thread opens
> > +	 * a window and exits without closing it in multithread tasks.
> > +	 */
> > +	if (!tsk) {
> > +		pid = window->tgid;
> > +		tsk = get_pid_task(pid, PIDTYPE_PID);
> > +		/*
> > +		 * Parent thread will be closing window during its exit.
> > +		 * So should not get here.
> > +		 */
> > +		if (!tsk)
> > +			return;
> > +	}
> > +
> > +	/* Return if the task is exiting. */
> > +	if (tsk->flags & PF_EXITING) {
> > +		put_task_struct(tsk);
> > +		return;
> > +	}
> > +
> > +	use_mm(window->mm);
> > +	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
> > +	/*
> > +	 * User space polls on csb.flags (first byte). So add barrier
> > +	 * then copy first byte with csb flags update.
> > +	 */
> > +	smp_mb();
> > +	if (!rc) {
> > +		csb.flags = CSB_V;
> > +		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
> > +	}
> > +	unuse_mm(window->mm);
> > +	put_task_struct(tsk);
> > +
> > +	/* Success */
> > +	if (!rc)
> > +		return;
> > +
> > +	pr_err("Invalid CSB address 0x%p signalling pid(%d)\n",
> > +			csb_addr, pid_vnr(pid));
> 
> This is a userspace error, not a kernel error. This should not be a pr_err().
> 
> Userspace could spam the console with this.

Will change it to pr_debug/info. Added pr_err() during development and
missed to remove. 
> 
> > +
> > +	clear_siginfo(&info);
> > +	info.si_signo = SIGSEGV;
> > +	info.si_errno = EFAULT;
> > +	info.si_code = SEGV_MAPERR;
> > +	info.si_addr = csb_addr;
> > +
> > +	/*
> > +	 * process will be polling on csb.flags after request is sent to
> > +	 * NX. So generally CSB update should not fail except when an
> > +	 * application does not follow the process properly. So an error
> > +	 * message will be displayed and leave it to user space whether
> > +	 * to ignore or handle this signal.
> > +	 */
> > +	rcu_read_lock();
> > +	rc = kill_pid_info(SIGSEGV, &info, pid);
> > +	rcu_read_unlock();
> 
> why the rcu_read_un/lock() here?

Used same as in kill_proc_info()/kill_something_info()
> 
> > +
> > +	pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
> > +			pid_vnr(pid), rc);
> > +}
> > +
> > +/*
> >   * Process CRBs that we receive on the fault window.
> >   */
> >  irqreturn_t vas_fault_handler(int irq, void *data)
> > @@ -104,6 +219,7 @@ irqreturn_t vas_fault_handler(int irq, void *data)
> >  			return IRQ_HANDLED;
> >  		}
> >  
> > +		update_csb(window, crb);
> >  	} while (true);
> >  
> >  	return IRQ_HANDLED;
>
Michael Neuling Feb. 10, 2020, 9:25 a.m. UTC | #3
> > > +
> > > +	csb.cc = CSB_CC_TRANSLATION;
> > > +	csb.ce = CSB_CE_TERMINATION;
> > > +	csb.cs = 0;
> > > +	csb.count = 0;
> > > +
> > > +	/*
> > > +	 * Returns the fault address in CPU format since it is passed with
> > > +	 * signal. But if the user space expects BE format, need changes.
> > > +	 * i.e either kernel (here) or user should convert to CPU format.
> > > +	 * Not both!
> > > +	 */
> > > +	csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr);
> > 
> > This looks wrong and I don't understand the comment. You need to convert
> > this
> > back to be64 to write it to csb.address. ie.
> > 
> >   csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr));
> > 
> > Which I think you can just avoid the endian conversion all together.
> 
> NX pastes fault CRB in big-endian, so passing this address in CPU format
> to user space, otherwise the library has to convert. 

OK, then please change the definition in struct coprocessor_status_block to just
__u64.

struct coprocessor_status_block {
	u8 flags;
	u8 cs;
	u8 cc;
	u8 ce;
	__be32 count;
	__be64 address;
} __packed __aligned(CSB_ALIGN);

Big but....

I thought "struct coprocessor_status_block" was also written by hardware. If
that's the case then it needs to be __be64 and you need the kernel to synthesize
exactly what the hardware is doing. Hence the struct definition is correct and
the kernel needs to convert to _be64 on writing. 

> What is the standard way for passing to user space? 

CPU endian.

> > > +	 * process will be polling on csb.flags after request is sent to
> > > +	 * NX. So generally CSB update should not fail except when an
> > > +	 * application does not follow the process properly. So an error
> > > +	 * message will be displayed and leave it to user space whether
> > > +	 * to ignore or handle this signal.
> > > +	 */
> > > +	rcu_read_lock();
> > > +	rc = kill_pid_info(SIGSEGV, &info, pid);
> > > +	rcu_read_unlock();
> > 
> > why the rcu_read_un/lock() here?
> 
> Used same as in kill_proc_info()/kill_something_info()

Please document.

Mikey
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
index 5c2cada..2cfab0c 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -11,6 +11,7 @@ 
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/mmu_context.h>
 #include <asm/icswx.h>
 
@@ -26,6 +27,120 @@ 
 #define VAS_FAULT_WIN_FIFO_SIZE	(4 << 20)
 
 /*
+ * Update the CSB to indicate a translation error.
+ *
+ * If the fault is in the CSB address itself or if we are unable to
+ * update the CSB, send a signal to the process, because we have no
+ * other way of notifying the user process.
+ *
+ * Remaining settings in the CSB are based on wait_for_csb() of
+ * NX-GZIP.
+ */
+static void update_csb(struct vas_window *window,
+			struct coprocessor_request_block *crb)
+{
+	int rc;
+	struct pid *pid;
+	void __user *csb_addr;
+	struct task_struct *tsk;
+	struct kernel_siginfo info;
+	struct coprocessor_status_block csb;
+
+	/*
+	 * NX user space windows can not be opened for task->mm=NULL
+	 * and faults will not be generated for kernel requests.
+	 */
+	if (!window->mm || !window->user_win)
+		return;
+
+	csb_addr = (void *)be64_to_cpu(crb->csb_addr);
+
+	csb.cc = CSB_CC_TRANSLATION;
+	csb.ce = CSB_CE_TERMINATION;
+	csb.cs = 0;
+	csb.count = 0;
+
+	/*
+	 * Returns the fault address in CPU format since it is passed with
+	 * signal. But if the user space expects BE format, need changes.
+	 * i.e either kernel (here) or user should convert to CPU format.
+	 * Not both!
+	 */
+	csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr);
+	csb.flags = 0;
+
+	pid = window->pid;
+	tsk = get_pid_task(pid, PIDTYPE_PID);
+	/*
+	 * Send window will be closed after processing all NX requests
+	 * and process exits after closing all windows. In multi-thread
+	 * applications, thread may not exists, but does not close FD
+	 * (means send window) upon exit. Parent thread (tgid) can use
+	 * and close the window later.
+	 * pid and mm references are taken when window is opened by
+	 * process (pid). So tgid is used only when child thread opens
+	 * a window and exits without closing it in multithread tasks.
+	 */
+	if (!tsk) {
+		pid = window->tgid;
+		tsk = get_pid_task(pid, PIDTYPE_PID);
+		/*
+		 * Parent thread will be closing window during its exit.
+		 * So should not get here.
+		 */
+		if (!tsk)
+			return;
+	}
+
+	/* Return if the task is exiting. */
+	if (tsk->flags & PF_EXITING) {
+		put_task_struct(tsk);
+		return;
+	}
+
+	use_mm(window->mm);
+	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+	/*
+	 * User space polls on csb.flags (first byte). So add barrier
+	 * then copy first byte with csb flags update.
+	 */
+	smp_mb();
+	if (!rc) {
+		csb.flags = CSB_V;
+		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+	}
+	unuse_mm(window->mm);
+	put_task_struct(tsk);
+
+	/* Success */
+	if (!rc)
+		return;
+
+	pr_err("Invalid CSB address 0x%p signalling pid(%d)\n",
+			csb_addr, pid_vnr(pid));
+
+	clear_siginfo(&info);
+	info.si_signo = SIGSEGV;
+	info.si_errno = EFAULT;
+	info.si_code = SEGV_MAPERR;
+	info.si_addr = csb_addr;
+
+	/*
+	 * process will be polling on csb.flags after request is sent to
+	 * NX. So generally CSB update should not fail except when an
+	 * application does not follow the process properly. So an error
+	 * message will be displayed and leave it to user space whether
+	 * to ignore or handle this signal.
+	 */
+	rcu_read_lock();
+	rc = kill_pid_info(SIGSEGV, &info, pid);
+	rcu_read_unlock();
+
+	pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
+			pid_vnr(pid), rc);
+}
+
+/*
  * Process CRBs that we receive on the fault window.
  */
 irqreturn_t vas_fault_handler(int irq, void *data)
@@ -104,6 +219,7 @@  irqreturn_t vas_fault_handler(int irq, void *data)
 			return IRQ_HANDLED;
 		}
 
+		update_csb(window, crb);
 	} while (true);
 
 	return IRQ_HANDLED;