diff mbox series

powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

Message ID 4fdc2aba6f5e51887d1cd0fee94be0989eada2cd.1586942312.git.christophe.leroy@c-s.fr
State Superseded
Headers show
Series powerpc/uaccess: Use flexible addressing with __put_user()/__get_user() | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (a9aa21d05c33c556e48c5062b6632a9b94906570)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 warning Upstream build failed, couldn't test patch
snowpatch_ozlabs/checkpatch warning total: 4 errors, 2 warnings, 1 checks, 82 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Christophe Leroy April 15, 2020, 9:20 a.m. UTC
At the time being, __put_user()/__get_user() and friends only use
register indirect with immediate index addressing, with the index
set to 0. Ex:

	lwz	reg1, 0(reg2)

Give the compiler the opportunity to use other adressing modes
whenever possible, to get more optimised code.

Hereunder is a small exemple:

struct test {
	u32 item1;
	u16 item2;
	u8 item3;
	u64 item4;
};

int set_test_user(struct test __user *from, struct test __user *to, int idx)
{
	int err;
	u32 item1;
	u16 item2;
	u8 item3;
	u64 item4;

	err = __get_user(item1, &from->item1);
	err |= __get_user(item2, &from->item2);
	err |= __get_user(item3, &from->item3);
	err |= __get_user(item4, &from->item4);

	err |= __put_user(item1, &to->item1);
	err |= __put_user(item2, &to->item2);
	err |= __put_user(item3, &to->item3);
	err |= __put_user(item4, &to->item4);

	return err;
}

Before the patch:

00000df0 <set_test_user>:
 df0:	94 21 ff f0 	stwu    r1,-16(r1)
 df4:	39 40 00 00 	li      r10,0
 df8:	93 c1 00 08 	stw     r30,8(r1)
 dfc:	93 e1 00 0c 	stw     r31,12(r1)
 e00:	7d 49 53 78 	mr      r9,r10
 e04:	80 a3 00 00 	lwz     r5,0(r3)
 e08:	38 e3 00 04 	addi    r7,r3,4
 e0c:	7d 46 53 78 	mr      r6,r10
 e10:	a0 e7 00 00 	lhz     r7,0(r7)
 e14:	7d 29 33 78 	or      r9,r9,r6
 e18:	39 03 00 06 	addi    r8,r3,6
 e1c:	7d 46 53 78 	mr      r6,r10
 e20:	89 08 00 00 	lbz     r8,0(r8)
 e24:	7d 29 33 78 	or      r9,r9,r6
 e28:	38 63 00 08 	addi    r3,r3,8
 e2c:	7d 46 53 78 	mr      r6,r10
 e30:	83 c3 00 00 	lwz     r30,0(r3)
 e34:	83 e3 00 04 	lwz     r31,4(r3)
 e38:	7d 29 33 78 	or      r9,r9,r6
 e3c:	7d 43 53 78 	mr      r3,r10
 e40:	90 a4 00 00 	stw     r5,0(r4)
 e44:	7d 29 1b 78 	or      r9,r9,r3
 e48:	38 c4 00 04 	addi    r6,r4,4
 e4c:	7d 43 53 78 	mr      r3,r10
 e50:	b0 e6 00 00 	sth     r7,0(r6)
 e54:	7d 29 1b 78 	or      r9,r9,r3
 e58:	38 e4 00 06 	addi    r7,r4,6
 e5c:	7d 43 53 78 	mr      r3,r10
 e60:	99 07 00 00 	stb     r8,0(r7)
 e64:	7d 23 1b 78 	or      r3,r9,r3
 e68:	38 84 00 08 	addi    r4,r4,8
 e6c:	93 c4 00 00 	stw     r30,0(r4)
 e70:	93 e4 00 04 	stw     r31,4(r4)
 e74:	7c 63 53 78 	or      r3,r3,r10
 e78:	83 c1 00 08 	lwz     r30,8(r1)
 e7c:	83 e1 00 0c 	lwz     r31,12(r1)
 e80:	38 21 00 10 	addi    r1,r1,16
 e84:	4e 80 00 20 	blr

After the patch:

00000dbc <set_test_user>:
 dbc:	39 40 00 00 	li      r10,0
 dc0:	7d 49 53 78 	mr      r9,r10
 dc4:	80 03 00 00 	lwz     r0,0(r3)
 dc8:	7d 48 53 78 	mr      r8,r10
 dcc:	a1 63 00 04 	lhz     r11,4(r3)
 dd0:	7d 29 43 78 	or      r9,r9,r8
 dd4:	7d 48 53 78 	mr      r8,r10
 dd8:	88 a3 00 06 	lbz     r5,6(r3)
 ddc:	7d 29 43 78 	or      r9,r9,r8
 de0:	7d 48 53 78 	mr      r8,r10
 de4:	80 c3 00 08 	lwz     r6,8(r3)
 de8:	80 e3 00 0c 	lwz     r7,12(r3)
 dec:	7d 29 43 78 	or      r9,r9,r8
 df0:	7d 43 53 78 	mr      r3,r10
 df4:	90 04 00 00 	stw     r0,0(r4)
 df8:	7d 29 1b 78 	or      r9,r9,r3
 dfc:	7d 43 53 78 	mr      r3,r10
 e00:	b1 64 00 04 	sth     r11,4(r4)
 e04:	7d 29 1b 78 	or      r9,r9,r3
 e08:	7d 43 53 78 	mr      r3,r10
 e0c:	98 a4 00 06 	stb     r5,6(r4)
 e10:	7d 23 1b 78 	or      r3,r9,r3
 e14:	90 c4 00 08 	stw     r6,8(r4)
 e18:	90 e4 00 0c 	stw     r7,12(r4)
 e1c:	7c 63 53 78 	or      r3,r3,r10
 e20:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/uaccess.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

Comments

Segher Boessenkool April 15, 2020, 10:06 p.m. UTC | #1
Hi!

On Wed, Apr 15, 2020 at 09:20:26AM +0000, Christophe Leroy wrote:
> At the time being, __put_user()/__get_user() and friends only use
> register indirect with immediate index addressing, with the index
> set to 0. Ex:
> 
> 	lwz	reg1, 0(reg2)

This is called a "D-form" instruction, or sometimes "offset addressing".
Don't talk about an "index", it confuses things, because the *other*
kind is called "indexed" already, also in the ISA docs!  (X-form, aka
indexed addressing, [reg+reg], where D-form does [reg+imm], and both
forms can do [reg]).

> Give the compiler the opportunity to use other adressing modes
> whenever possible, to get more optimised code.

Great :-)

> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -114,7 +114,7 @@ extern long __put_user_bad(void);
>   */
>  #define __put_user_asm(x, addr, err, op)			\
>  	__asm__ __volatile__(					\
> -		"1:	" op " %1,0(%2)	# put_user\n"		\
> +		"1:	" op "%U2%X2 %1,%2	# put_user\n"	\
>  		"2:\n"						\
>  		".section .fixup,\"ax\"\n"			\
>  		"3:	li %0,%3\n"				\
> @@ -122,7 +122,7 @@ extern long __put_user_bad(void);
>  		".previous\n"					\
>  		EX_TABLE(1b, 3b)				\
>  		: "=r" (err)					\
> -		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
> +		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))

%Un on an "m" operand doesn't do much: you need to make it "m<>" if you
want pre-modify ("update") insns to be generated.  (You then will want
to make sure that operand is used in a way GCC can understand; since it
is used only once here, that works fine).

> @@ -130,8 +130,8 @@ extern long __put_user_bad(void);
>  #else /* __powerpc64__ */
>  #define __put_user_asm2(x, addr, err)				\
>  	__asm__ __volatile__(					\
> -		"1:	stw %1,0(%2)\n"				\
> -		"2:	stw %1+1,4(%2)\n"			\
> +		"1:	stw%U2%X2 %1,%2\n"			\
> +		"2:	stw%U2%X2 %L1,%L2\n"			\
>  		"3:\n"						\
>  		".section .fixup,\"ax\"\n"			\
>  		"4:	li %0,%3\n"				\
> @@ -140,7 +140,7 @@ extern long __put_user_bad(void);
>  		EX_TABLE(1b, 4b)				\
>  		EX_TABLE(2b, 4b)				\
>  		: "=r" (err)					\
> -		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
> +		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))

Here, it doesn't work.  You don't want two consecutive update insns in
any case.  Easiest is to just not use "m<>", and then, don't use %Un
(which won't do anything, but it is confusing).

Same for the reads.

Rest looks fine, and update should be good with that fixed as said.

Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>


Segher
Christophe Leroy April 16, 2020, 5:50 a.m. UTC | #2
Hi,

Le 16/04/2020 à 00:06, Segher Boessenkool a écrit :
> Hi!
> 
> On Wed, Apr 15, 2020 at 09:20:26AM +0000, Christophe Leroy wrote:
>> At the time being, __put_user()/__get_user() and friends only use
>> register indirect with immediate index addressing, with the index
>> set to 0. Ex:
>>
>> 	lwz	reg1, 0(reg2)
> 
> This is called a "D-form" instruction, or sometimes "offset addressing".
> Don't talk about an "index", it confuses things, because the *other*
> kind is called "indexed" already, also in the ISA docs!  (X-form, aka
> indexed addressing, [reg+reg], where D-form does [reg+imm], and both
> forms can do [reg]).

In the "Programming Environments Manual for 32-Bit Implementations of 
the PowerPC™ Architecture", they list the following addressing modes:

Load and store operations have three categories of effective address 
generation that depend on the
operands specified:
• Register indirect with immediate index mode
• Register indirect with index mode
• Register indirect mode


> 
>> Give the compiler the opportunity to use other adressing modes
>> whenever possible, to get more optimised code.
> 
> Great :-)
> 
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -114,7 +114,7 @@ extern long __put_user_bad(void);
>>    */
>>   #define __put_user_asm(x, addr, err, op)			\
>>   	__asm__ __volatile__(					\
>> -		"1:	" op " %1,0(%2)	# put_user\n"		\
>> +		"1:	" op "%U2%X2 %1,%2	# put_user\n"	\
>>   		"2:\n"						\
>>   		".section .fixup,\"ax\"\n"			\
>>   		"3:	li %0,%3\n"				\
>> @@ -122,7 +122,7 @@ extern long __put_user_bad(void);
>>   		".previous\n"					\
>>   		EX_TABLE(1b, 3b)				\
>>   		: "=r" (err)					\
>> -		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
>> +		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
> 
> %Un on an "m" operand doesn't do much: you need to make it "m<>" if you
> want pre-modify ("update") insns to be generated.  (You then will want
> to make sure that operand is used in a way GCC can understand; since it
> is used only once here, that works fine).

Ah ? Indeed I got the idea from include/asm/io.h where there is:

#define DEF_MMIO_IN_D(name, size, insn)				\
static inline u##size name(const volatile u##size __iomem *addr)	\
{									\
	u##size ret;							\
	__asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
		: "=r" (ret) : "m" (*addr) : "memory");			\
	return ret;							\
}

It should be "m<>" there as well ?

> 
>> @@ -130,8 +130,8 @@ extern long __put_user_bad(void);
>>   #else /* __powerpc64__ */
>>   #define __put_user_asm2(x, addr, err)				\
>>   	__asm__ __volatile__(					\
>> -		"1:	stw %1,0(%2)\n"				\
>> -		"2:	stw %1+1,4(%2)\n"			\
>> +		"1:	stw%U2%X2 %1,%2\n"			\
>> +		"2:	stw%U2%X2 %L1,%L2\n"			\
>>   		"3:\n"						\
>>   		".section .fixup,\"ax\"\n"			\
>>   		"4:	li %0,%3\n"				\
>> @@ -140,7 +140,7 @@ extern long __put_user_bad(void);
>>   		EX_TABLE(1b, 4b)				\
>>   		EX_TABLE(2b, 4b)				\
>>   		: "=r" (err)					\
>> -		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
>> +		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
> 
> Here, it doesn't work.  You don't want two consecutive update insns in
> any case.  Easiest is to just not use "m<>", and then, don't use %Un
> (which won't do anything, but it is confusing).

Can't we leave the Un on the second stw ?

> 
> Same for the reads.
> 
> Rest looks fine, and update should be good with that fixed as said.
> 
> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
> 
> 
> Segher
> 

Thanks for the review
Christophe
Segher Boessenkool April 17, 2020, 1:39 a.m. UTC | #3
Hi!

On Thu, Apr 16, 2020 at 07:50:00AM +0200, Christophe Leroy wrote:
> Le 16/04/2020 à 00:06, Segher Boessenkool a écrit :
> >On Wed, Apr 15, 2020 at 09:20:26AM +0000, Christophe Leroy wrote:
> >>At the time being, __put_user()/__get_user() and friends only use
> >>register indirect with immediate index addressing, with the index
> >>set to 0. Ex:
> >>
> >>	lwz	reg1, 0(reg2)
> >
> >This is called a "D-form" instruction, or sometimes "offset addressing".
> >Don't talk about an "index", it confuses things, because the *other*
> >kind is called "indexed" already, also in the ISA docs!  (X-form, aka
> >indexed addressing, [reg+reg], where D-form does [reg+imm], and both
> >forms can do [reg]).
> 
> In the "Programming Environments Manual for 32-Bit Implementations of 
> the PowerPC™ Architecture", they list the following addressing modes:
> 
> Load and store operations have three categories of effective address 
> generation that depend on the
> operands specified:
> • Register indirect with immediate index mode
> • Register indirect with index mode
> • Register indirect mode

Huh.  I must have pushed all that confusing terminology to the back of
my head :-)

> >%Un on an "m" operand doesn't do much: you need to make it "m<>" if you
> >want pre-modify ("update") insns to be generated.  (You then will want
> >to make sure that operand is used in a way GCC can understand; since it
> >is used only once here, that works fine).
> 
> Ah ? Indeed I got the idea from include/asm/io.h where there is:
> 
> #define DEF_MMIO_IN_D(name, size, insn)				\
> static inline u##size name(const volatile u##size __iomem *addr)	\
> {									\
> 	u##size ret;							\
> 	__asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
> 		: "=r" (ret) : "m" (*addr) : "memory");			\
> 	return ret;							\
> }
> 
> It should be "m<>" there as well ?

Yes, that will work here.

Long ago, "m" in inline assembler code did the same as "m<>" now does
(and "m" internal in GCC still does).  To get a memory without pre-modify
addressing, you used "es".

Since people kept getting that wrong (it *is* surprising), it was changed
to the current scheme.  But the kernel uses weren't updated (and no one
seems to have missed it).

> >>  #else /* __powerpc64__ */
> >>  #define __put_user_asm2(x, addr, err)				\
> >>  	__asm__ __volatile__(					\
> >>-		"1:	stw %1,0(%2)\n"				\
> >>-		"2:	stw %1+1,4(%2)\n"			\
> >>+		"1:	stw%U2%X2 %1,%2\n"			\
> >>+		"2:	stw%U2%X2 %L1,%L2\n"			\
> >>  		"3:\n"						\
> >>  		".section .fixup,\"ax\"\n"			\
> >>  		"4:	li %0,%3\n"				\
> >>@@ -140,7 +140,7 @@ extern long __put_user_bad(void);
> >>  		EX_TABLE(1b, 4b)				\
> >>  		EX_TABLE(2b, 4b)				\
> >>  		: "=r" (err)					\
> >>-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
> >>+		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
> >
> >Here, it doesn't work.  You don't want two consecutive update insns in
> >any case.  Easiest is to just not use "m<>", and then, don't use %Un
> >(which won't do anything, but it is confusing).
> 
> Can't we leave the Un on the second stw ?

That cannot work.  You can use it on only the *first* though :-)

And this doesn't work on LE I think?  (But the asm doesn't anyway?)

Or, you can decide this is all way too tricky, and not worth it.


Segher
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 2f500debae21..dee71e9c7618 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -114,7 +114,7 @@  extern long __put_user_bad(void);
  */
 #define __put_user_asm(x, addr, err, op)			\
 	__asm__ __volatile__(					\
-		"1:	" op " %1,0(%2)	# put_user\n"		\
+		"1:	" op "%U2%X2 %1,%2	# put_user\n"	\
 		"2:\n"						\
 		".section .fixup,\"ax\"\n"			\
 		"3:	li %0,%3\n"				\
@@ -122,7 +122,7 @@  extern long __put_user_bad(void);
 		".previous\n"					\
 		EX_TABLE(1b, 3b)				\
 		: "=r" (err)					\
-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __put_user_asm2(x, ptr, retval)				\
@@ -130,8 +130,8 @@  extern long __put_user_bad(void);
 #else /* __powerpc64__ */
 #define __put_user_asm2(x, addr, err)				\
 	__asm__ __volatile__(					\
-		"1:	stw %1,0(%2)\n"				\
-		"2:	stw %1+1,4(%2)\n"			\
+		"1:	stw%U2%X2 %1,%2\n"			\
+		"2:	stw%U2%X2 %L1,%L2\n"			\
 		"3:\n"						\
 		".section .fixup,\"ax\"\n"			\
 		"4:	li %0,%3\n"				\
@@ -140,7 +140,7 @@  extern long __put_user_bad(void);
 		EX_TABLE(1b, 4b)				\
 		EX_TABLE(2b, 4b)				\
 		: "=r" (err)					\
-		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+		: "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
 #define __put_user_size_allowed(x, ptr, size, retval)		\
@@ -217,7 +217,7 @@  extern long __get_user_bad(void);
 
 #define __get_user_asm(x, addr, err, op)		\
 	__asm__ __volatile__(				\
-		"1:	"op" %1,0(%2)	# get_user\n"	\
+		"1:	"op"%U2%X2 %1, %2	# get_user\n"	\
 		"2:\n"					\
 		".section .fixup,\"ax\"\n"		\
 		"3:	li %0,%3\n"			\
@@ -226,7 +226,7 @@  extern long __get_user_bad(void);
 		".previous\n"				\
 		EX_TABLE(1b, 3b)			\
 		: "=r" (err), "=r" (x)			\
-		: "b" (addr), "i" (-EFAULT), "0" (err))
+		: "m" (*addr), "i" (-EFAULT), "0" (err))
 
 #ifdef __powerpc64__
 #define __get_user_asm2(x, addr, err)			\
@@ -234,8 +234,8 @@  extern long __get_user_bad(void);
 #else /* __powerpc64__ */
 #define __get_user_asm2(x, addr, err)			\
 	__asm__ __volatile__(				\
-		"1:	lwz %1,0(%2)\n"			\
-		"2:	lwz %1+1,4(%2)\n"		\
+		"1:	lwz%U2%X2 %1, %2\n"			\
+		"2:	lwz%U2%X2 %L1, %L2\n"		\
 		"3:\n"					\
 		".section .fixup,\"ax\"\n"		\
 		"4:	li %0,%3\n"			\
@@ -246,7 +246,7 @@  extern long __get_user_bad(void);
 		EX_TABLE(1b, 4b)			\
 		EX_TABLE(2b, 4b)			\
 		: "=r" (err), "=&r" (x)			\
-		: "b" (addr), "i" (-EFAULT), "0" (err))
+		: "m" (*addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
 #define __get_user_size_allowed(x, ptr, size, retval)		\
@@ -256,10 +256,10 @@  do {								\
 	if (size > sizeof(x))					\
 		(x) = __get_user_bad();				\
 	switch (size) {						\
-	case 1: __get_user_asm(x, ptr, retval, "lbz"); break;	\
-	case 2: __get_user_asm(x, ptr, retval, "lhz"); break;	\
-	case 4: __get_user_asm(x, ptr, retval, "lwz"); break;	\
-	case 8: __get_user_asm2(x, ptr, retval);  break;	\
+	case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break;	\
+	case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break;	\
+	case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break;	\
+	case 8: __get_user_asm2(x, (u64 __user *)ptr, retval);  break;	\
 	default: (x) = __get_user_bad();			\
 	}							\
 } while (0)