diff mbox series

[v5,3/4] um: add a UML specific futex implementation

Message ID 20201214181459.11096-4-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series [v5,1/4] um: Add support for host CPU flags and alignment | expand

Commit Message

Anton Ivanov Dec. 14, 2020, 6:14 p.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

The generic asm futex implementation emulates atomic access to
memory by doing a get_user followed by put_user. These translate
to two mapping operations on UML with paging enabled in the
meantime. This, in turn may end up changing interrupts,
invoking the signal loop, etc.

This replaces the generic implementation by a mapping followed
by an operation on the mapped segment.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/Kbuild    |   1 -
 arch/um/include/asm/futex.h   |  14 ++++
 arch/um/kernel/skas/uaccess.c | 130 ++++++++++++++++++++++++++++++++++
 3 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 arch/um/include/asm/futex.h

Comments

Anton Ivanov Dec. 15, 2020, 9:43 a.m. UTC | #1
On 14/12/2020 18:14, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> The generic asm futex implementation emulates atomic access to
> memory by doing a get_user followed by put_user. These translate
> to two mapping operations on UML with paging enabled in the
> meantime. This, in turn may end up changing interrupts,
> invoking the signal loop, etc.
>
> This replaces the generic implementation by a mapping followed
> by an operation on the mapped segment.
>
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   arch/um/include/asm/Kbuild    |   1 -
>   arch/um/include/asm/futex.h   |  14 ++++
>   arch/um/kernel/skas/uaccess.c | 130 ++++++++++++++++++++++++++++++++++
>   3 files changed, 144 insertions(+), 1 deletion(-)
>   create mode 100644 arch/um/include/asm/futex.h
>
> diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
> index 1c63b260ecc4..873e6815bc50 100644
> --- a/arch/um/include/asm/Kbuild
> +++ b/arch/um/include/asm/Kbuild
> @@ -8,7 +8,6 @@ generic-y += emergency-restart.h
>   generic-y += exec.h
>   generic-y += extable.h
>   generic-y += ftrace.h
> -generic-y += futex.h
>   generic-y += hw_irq.h
>   generic-y += irq_regs.h
>   generic-y += irq_work.h
> diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
> new file mode 100644
> index 000000000000..0a01f1f60de0
> --- /dev/null
> +++ b/arch/um/include/asm/futex.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_GENERIC_FUTEX_H
> +#define _ASM_GENERIC_FUTEX_H

^^^^ Uggghhh

> +
> +#include <linux/futex.h>
> +#include <linux/uaccess.h>
> +#include <asm/errno.h>
> +
> +
> +extern int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
> +extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
> +			      u32 oldval, u32 newval);
^^^^ Did not address Johannes comments, will do in v6
> +
> +#endif
> diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
> index 2dec915abe6f..85022e91bf53 100644
> --- a/arch/um/kernel/skas/uaccess.c
> +++ b/arch/um/kernel/skas/uaccess.c
> @@ -11,6 +11,7 @@
>   #include <asm/current.h>
>   #include <asm/page.h>
>   #include <kern_util.h>
> +#include <asm/futex.h>
>   #include <os.h>
>   
>   pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
> @@ -248,3 +249,132 @@ long __strnlen_user(const void __user *str, long len)
>   	return 0;
>   }
>   EXPORT_SYMBOL(__strnlen_user);
> +
> +/**
> + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
> + *			  argument and comparison of the previous
> + *			  futex value with another constant.
> + *
> + * @encoded_op:	encoded operation to execute
> + * @uaddr:	pointer to user space address
> + *
> + * Return:
> + * 0 - On success
> + * -EFAULT - User access resulted in a page fault
> + * -EAGAIN - Atomic operation was unable to complete due to contention
> + * -ENOSYS - Operation not supported
> + */
> +
> +int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
> +{
> +	int oldval, ret;
> +	struct page *page;
> +	unsigned long addr = (unsigned long) uaddr;
> +	pte_t *pte;
> +
> +	ret = -EFAULT;
> +	if (!access_ok(uaddr, sizeof(*uaddr)))
> +		return -EFAULT;
> +	preempt_disable();
> +	pte = maybe_map(addr, 1);
> +	if (pte == NULL)
> +		goto out_inuser;
> +
> +	page = pte_page(*pte);
> +#ifdef CONFIG_64BIT
> +	pagefault_disable();
> +	addr = (unsigned long) page_address(page) +
> +			(((unsigned long) addr) & ~PAGE_MASK);
> +#else
> +	addr = (unsigned long) kmap_atomic(page) +
> +		((unsigned long) addr & ~PAGE_MASK);
> +#endif
> +	uaddr = (u32 *) addr;
> +	oldval = *uaddr;
> +
> +	ret = 0;
> +
> +	switch (op) {
> +	case FUTEX_OP_SET:
> +		*uaddr = oparg;
> +		break;
> +	case FUTEX_OP_ADD:
> +		*uaddr += oparg;
> +		break;
> +	case FUTEX_OP_OR:
> +		*uaddr |= oparg;
> +		break;
> +	case FUTEX_OP_ANDN:
> +		*uaddr &= ~oparg;
> +		break;
> +	case FUTEX_OP_XOR:
> +		*uaddr ^= oparg;
> +		break;
> +	default:
> +		ret = -ENOSYS;
> +	}
> +#ifdef CONFIG_64BIT
> +	pagefault_enable();
> +#else
> +	kunmap_atomic((void *)addr);
> +#endif
> +
> +out_inuser:
> +	preempt_enable();
> +
> +	if (ret == 0)
> +		*oval = oldval;
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
> +
> +/**
> + * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
> + *				uaddr with newval if the current value is
> + *				oldval.
> + * @uval:	pointer to store content of @uaddr
> + * @uaddr:	pointer to user space address
> + * @oldval:	old value
> + * @newval:	new value to store to @uaddr
> + *
> + * Return:
> + * 0 - On success
> + * -EFAULT - User access resulted in a page fault
> + * -EAGAIN - Atomic operation was unable to complete due to contention
> + * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
> + */
> +
> +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
> +			      u32 oldval, u32 newval)
> +{
> +	u32 val;
> +	struct page *page;
> +	pte_t *pte;
> +	int ret = -EFAULT;
> +
> +	if (!access_ok(uaddr, sizeof(*uaddr)))
> +		return -EFAULT;
> +
> +	preempt_disable();
> +	pte = maybe_map((unsigned long) uaddr, 1);
> +	if (pte == NULL)
> +		goto out_inatomic;
> +
> +	page = pte_page(*pte);
> +	pagefault_disable();
> +	uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
^^^ missing 32 bit case, sending v6 with fix shortly.
> +
> +	val = *uaddr;
> +
> +	ret = cmpxchg(uaddr, oldval, newval);
> +
> +	*uval = val;
> +	pagefault_enable();
> +	ret = 0;
> +
> +out_inatomic:
> +	preempt_enable();
> +	return ret;
> +}
> +EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);
Richard Weinberger Dec. 15, 2020, 9:46 a.m. UTC | #2
Anton,

----- Ursprüngliche Mail -----
> Von: "anton ivanov" <anton.ivanov@cambridgegreys.com>
> An: "linux-um" <linux-um@lists.infradead.org>
> CC: "richard" <richard@nod.at>
> Gesendet: Dienstag, 15. Dezember 2020 10:43:48
> Betreff: Re: [PATCH v5 3/4] um: add a UML specific futex implementation

> On 14/12/2020 18:14, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> The generic asm futex implementation emulates atomic access to
>> memory by doing a get_user followed by put_user. These translate
>> to two mapping operations on UML with paging enabled in the
>> meantime. This, in turn may end up changing interrupts,
>> invoking the signal loop, etc.
>>
>> This replaces the generic implementation by a mapping followed
>> by an operation on the mapped segment.
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   arch/um/include/asm/Kbuild    |   1 -
>>   arch/um/include/asm/futex.h   |  14 ++++
>>   arch/um/kernel/skas/uaccess.c | 130 ++++++++++++++++++++++++++++++++++
>>   3 files changed, 144 insertions(+), 1 deletion(-)
>>   create mode 100644 arch/um/include/asm/futex.h
>>
>> diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
>> index 1c63b260ecc4..873e6815bc50 100644
>> --- a/arch/um/include/asm/Kbuild
>> +++ b/arch/um/include/asm/Kbuild
>> @@ -8,7 +8,6 @@ generic-y += emergency-restart.h
>>   generic-y += exec.h
>>   generic-y += extable.h
>>   generic-y += ftrace.h
>> -generic-y += futex.h
>>   generic-y += hw_irq.h
>>   generic-y += irq_regs.h
>>   generic-y += irq_work.h
>> diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
>> new file mode 100644
>> index 000000000000..0a01f1f60de0
>> --- /dev/null
>> +++ b/arch/um/include/asm/futex.h
>> @@ -0,0 +1,14 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ASM_GENERIC_FUTEX_H
>> +#define _ASM_GENERIC_FUTEX_H
> 
> ^^^^ Uggghhh
> 
>> +
>> +#include <linux/futex.h>
>> +#include <linux/uaccess.h>
>> +#include <asm/errno.h>
>> +
>> +
>> +extern int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user
>> *uaddr);
>> +extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
>> +			      u32 oldval, u32 newval);
> ^^^^ Did not address Johannes comments, will do in v6

No need to hurry. This has to wait anyway for 5.12. :-)

Thanks,
//richard
Anton Ivanov Dec. 15, 2020, 9:56 a.m. UTC | #3
On 15/12/2020 09:46, Richard Weinberger wrote:
> Anton,
>
> ----- Ursprüngliche Mail -----
>> Von: "anton ivanov" <anton.ivanov@cambridgegreys.com>
>> An: "linux-um" <linux-um@lists.infradead.org>
>> CC: "richard" <richard@nod.at>
>> Gesendet: Dienstag, 15. Dezember 2020 10:43:48
>> Betreff: Re: [PATCH v5 3/4] um: add a UML specific futex implementation
>> On 14/12/2020 18:14, anton.ivanov@cambridgegreys.com wrote:
>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> The generic asm futex implementation emulates atomic access to
>>> memory by doing a get_user followed by put_user. These translate
>>> to two mapping operations on UML with paging enabled in the
>>> meantime. This, in turn may end up changing interrupts,
>>> invoking the signal loop, etc.
>>>
>>> This replaces the generic implementation by a mapping followed
>>> by an operation on the mapped segment.
>>>
>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>> ---
>>>    arch/um/include/asm/Kbuild    |   1 -
>>>    arch/um/include/asm/futex.h   |  14 ++++
>>>    arch/um/kernel/skas/uaccess.c | 130 ++++++++++++++++++++++++++++++++++
>>>    3 files changed, 144 insertions(+), 1 deletion(-)
>>>    create mode 100644 arch/um/include/asm/futex.h
>>>
>>> diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
>>> index 1c63b260ecc4..873e6815bc50 100644
>>> --- a/arch/um/include/asm/Kbuild
>>> +++ b/arch/um/include/asm/Kbuild
>>> @@ -8,7 +8,6 @@ generic-y += emergency-restart.h
>>>    generic-y += exec.h
>>>    generic-y += extable.h
>>>    generic-y += ftrace.h
>>> -generic-y += futex.h
>>>    generic-y += hw_irq.h
>>>    generic-y += irq_regs.h
>>>    generic-y += irq_work.h
>>> diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
>>> new file mode 100644
>>> index 000000000000..0a01f1f60de0
>>> --- /dev/null
>>> +++ b/arch/um/include/asm/futex.h
>>> @@ -0,0 +1,14 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +#ifndef _ASM_GENERIC_FUTEX_H
>>> +#define _ASM_GENERIC_FUTEX_H
>> ^^^^ Uggghhh
>>
>>> +
>>> +#include <linux/futex.h>
>>> +#include <linux/uaccess.h>
>>> +#include <asm/errno.h>
>>> +
>>> +
>>> +extern int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user
>>> *uaddr);
>>> +extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
>>> +			      u32 oldval, u32 newval);
>> ^^^^ Did not address Johannes comments, will do in v6
> No need to hurry. This has to wait anyway for 5.12. :-)

That's fine :)

I wanted to get it out of the way and get back to eBPF.

I am trying to get my head around on how to reuse as much as possible from the vector code to build an AF_XDP multi-port netdev and a switchdev on top of that. Some of it is like pulling teeth without anesthetic. The documentation leaves a lot to be desired :)

Compared to that some performance polishing was a nice (and fairly pleasant) distraction.

I think it is close to where it should be now. It passes all benchmarks including multi-threaded userspace ones on both 32 and 64.

One thing which we should probably consider is on/off-ing the display of the CPU flags acquired from the host via a kernel command line switch. At present they are reflected 1:1 into the UML instance.

>
> Thanks,
> //richard
>
diff mbox series

Patch

diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 1c63b260ecc4..873e6815bc50 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -8,7 +8,6 @@  generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += ftrace.h
-generic-y += futex.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
new file mode 100644
index 000000000000..0a01f1f60de0
--- /dev/null
+++ b/arch/um/include/asm/futex.h
@@ -0,0 +1,14 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_FUTEX_H
+#define _ASM_GENERIC_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+
+extern int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
+extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval);
+
+#endif
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 2dec915abe6f..85022e91bf53 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -11,6 +11,7 @@ 
 #include <asm/current.h>
 #include <asm/page.h>
 #include <kern_util.h>
+#include <asm/futex.h>
 #include <os.h>
 
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
@@ -248,3 +249,132 @@  long __strnlen_user(const void __user *str, long len)
 	return 0;
 }
 EXPORT_SYMBOL(__strnlen_user);
+
+/**
+ * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ *			  argument and comparison of the previous
+ *			  futex value with another constant.
+ *
+ * @encoded_op:	encoded operation to execute
+ * @uaddr:	pointer to user space address
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
+ */
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+{
+	int oldval, ret;
+	struct page *page;
+	unsigned long addr = (unsigned long) uaddr;
+	pte_t *pte;
+
+	ret = -EFAULT;
+	if (!access_ok(uaddr, sizeof(*uaddr)))
+		return -EFAULT;
+	preempt_disable();
+	pte = maybe_map(addr, 1);
+	if (pte == NULL)
+		goto out_inuser;
+
+	page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+	pagefault_disable();
+	addr = (unsigned long) page_address(page) +
+			(((unsigned long) addr) & ~PAGE_MASK);
+#else
+	addr = (unsigned long) kmap_atomic(page) +
+		((unsigned long) addr & ~PAGE_MASK);
+#endif
+	uaddr = (u32 *) addr;
+	oldval = *uaddr;
+
+	ret = 0;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		*uaddr = oparg;
+		break;
+	case FUTEX_OP_ADD:
+		*uaddr += oparg;
+		break;
+	case FUTEX_OP_OR:
+		*uaddr |= oparg;
+		break;
+	case FUTEX_OP_ANDN:
+		*uaddr &= ~oparg;
+		break;
+	case FUTEX_OP_XOR:
+		*uaddr ^= oparg;
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+#ifdef CONFIG_64BIT
+	pagefault_enable();
+#else
+	kunmap_atomic((void *)addr);
+#endif
+
+out_inuser:
+	preempt_enable();
+
+	if (ret == 0)
+		*oval = oldval;
+
+	return ret;
+}
+EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
+
+/**
+ * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ *				uaddr with newval if the current value is
+ *				oldval.
+ * @uval:	pointer to store content of @uaddr
+ * @uaddr:	pointer to user space address
+ * @oldval:	old value
+ * @newval:	new value to store to @uaddr
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
+ */
+
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	u32 val;
+	struct page *page;
+	pte_t *pte;
+	int ret = -EFAULT;
+
+	if (!access_ok(uaddr, sizeof(*uaddr)))
+		return -EFAULT;
+
+	preempt_disable();
+	pte = maybe_map((unsigned long) uaddr, 1);
+	if (pte == NULL)
+		goto out_inatomic;
+
+	page = pte_page(*pte);
+	pagefault_disable();
+	uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
+
+	val = *uaddr;
+
+	ret = cmpxchg(uaddr, oldval, newval);
+
+	*uval = val;
+	pagefault_enable();
+	ret = 0;
+
+out_inatomic:
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);