diff mbox

powerpc: sysdev: cpm1: Optimise gpio bit calculation

Message ID 20170309094206.A832167992@localhost.localdomain (mailing list archive)
State Rejected
Headers show

Commit Message

Christophe Leroy March 9, 2017, 9:42 a.m. UTC
Help a bit the compiler to provide better code:

unsigned int f(int i)
{
	return 1 << (31 - i);
}

unsigned int g(int i)
{
	return 0x80000000 >> i;
}

Disassembly of section .text:

00000000 <f>:
   0:	20 63 00 1f 	subfic  r3,r3,31
   4:	39 20 00 01 	li      r9,1
   8:	7d 23 18 30 	slw     r3,r9,r3
   c:	4e 80 00 20 	blr

00000010 <g>:
  10:	3d 20 80 00 	lis     r9,-32768
  14:	7d 23 1c 30 	srw     r3,r9,r3
  18:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/sysdev/cpm1.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

Comments

Michael Ellerman March 10, 2017, 8:41 a.m. UTC | #1
Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Help a bit the compiler to provide better code:
>
> unsigned int f(int i)
> {
> 	return 1 << (31 - i);
> }
>
> unsigned int g(int i)
> {
> 	return 0x80000000 >> i;
> }
>
> Disassembly of section .text:
>
> 00000000 <f>:
>    0:	20 63 00 1f 	subfic  r3,r3,31
>    4:	39 20 00 01 	li      r9,1
>    8:	7d 23 18 30 	slw     r3,r9,r3
>    c:	4e 80 00 20 	blr
>
> 00000010 <g>:
>   10:	3d 20 80 00 	lis     r9,-32768
>   14:	7d 23 1c 30 	srw     r3,r9,r3
>   18:	4e 80 00 20 	blr

Well yeah, it saves one instruction, but is it worth it? Are these gpio
routines in some hot path I don't know about?

cheers
Christophe Leroy March 10, 2017, 10:54 a.m. UTC | #2
Le 10/03/2017 à 09:41, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
>> Help a bit the compiler to provide better code:
>>
>> unsigned int f(int i)
>> {
>> 	return 1 << (31 - i);
>> }
>>
>> unsigned int g(int i)
>> {
>> 	return 0x80000000 >> i;
>> }
>>
>> Disassembly of section .text:
>>
>> 00000000 <f>:
>>    0:	20 63 00 1f 	subfic  r3,r3,31
>>    4:	39 20 00 01 	li      r9,1
>>    8:	7d 23 18 30 	slw     r3,r9,r3
>>    c:	4e 80 00 20 	blr
>>
>> 00000010 <g>:
>>   10:	3d 20 80 00 	lis     r9,-32768
>>   14:	7d 23 1c 30 	srw     r3,r9,r3
>>   18:	4e 80 00 20 	blr
>
> Well yeah, it saves one instruction, but is it worth it? Are these gpio
> routines in some hot path I don't know about?
>

It saves one instruction, and one register (see other exemple below 
where r3 is to be preserved)

gpio_get() and gpio_set() are used extensively by some GPIO based 
drivers like SPI, NAND, so it may be worth it as it doesn't impair 
readability (if anyone prefers, we could write  (1 << 31) >> i  instead 
of  0x80000000 >> i )

unsigned int f(int i, unsigned int *a)
{
	*a = 1 << (31 - i);

	return i;
}

unsigned int g(int i, unsigned int *a)
{
	*a = 0x80000000 >> i;

	return i;
}

toto.o:     file format elf32-powerpc


Disassembly of section .text:

00000000 <f>:
    0:	21 43 00 1f 	subfic  r10,r3,31
    4:	39 20 00 01 	li      r9,1
    8:	7d 29 50 30 	slw     r9,r9,r10
    c:	91 24 00 00 	stw     r9,0(r4)
   10:	4e 80 00 20 	blr

00000014 <g>:
   14:	3d 20 80 00 	lis     r9,-32768
   18:	7d 29 1c 30 	srw     r9,r9,r3
   1c:	91 24 00 00 	stw     r9,0(r4)
   20:	4e 80 00 20 	blr
Segher Boessenkool March 10, 2017, 1:02 p.m. UTC | #3
On Fri, Mar 10, 2017 at 07:41:33PM +1100, Michael Ellerman wrote:
> Well yeah, it saves one instruction, but is it worth it? Are these gpio
> routines in some hot path I don't know about?

If there was a GCC PR for this we probably would make GCC optimise
this; there are many similar things that are optimised already, just
not this one.


Segher
Segher Boessenkool March 10, 2017, 1:06 p.m. UTC | #4
On Fri, Mar 10, 2017 at 11:54:19AM +0100, Christophe LEROY wrote:
> gpio_get() and gpio_set() are used extensively by some GPIO based 
> drivers like SPI, NAND, so it may be worth it as it doesn't impair 
> readability (if anyone prefers, we could write  (1 << 31) >> i  instead 
> of  0x80000000 >> i )

1 << 31 is undefined behaviour, of course.


Segher
Christophe Leroy March 10, 2017, 2:04 p.m. UTC | #5
Le 10/03/2017 à 14:06, Segher Boessenkool a écrit :
> On Fri, Mar 10, 2017 at 11:54:19AM +0100, Christophe LEROY wrote:
>> gpio_get() and gpio_set() are used extensively by some GPIO based
>> drivers like SPI, NAND, so it may be worth it as it doesn't impair
>> readability (if anyone prefers, we could write  (1 << 31) >> i  instead
>> of  0x80000000 >> i )
>
> 1 << 31 is undefined behaviour, of course.
>

Shall it be 1U << 31 ?

Christophe
Segher Boessenkool March 10, 2017, 2:32 p.m. UTC | #6
On Fri, Mar 10, 2017 at 03:04:48PM +0100, Christophe LEROY wrote:
> Le 10/03/2017 à 14:06, Segher Boessenkool a écrit :
> >On Fri, Mar 10, 2017 at 11:54:19AM +0100, Christophe LEROY wrote:
> >>gpio_get() and gpio_set() are used extensively by some GPIO based
> >>drivers like SPI, NAND, so it may be worth it as it doesn't impair
> >>readability (if anyone prefers, we could write  (1 << 31) >> i  instead
> >>of  0x80000000 >> i )
> >
> >1 << 31 is undefined behaviour, of course.
> >
> 
> Shall it be 1U << 31 ?

Sure, that works.  "1 << (31 - i)" is most readable (but it doesn't yet
generate the code you want).


Segher
Christophe Leroy March 10, 2017, 2:41 p.m. UTC | #7
Le 10/03/2017 à 15:32, Segher Boessenkool a écrit :
> On Fri, Mar 10, 2017 at 03:04:48PM +0100, Christophe LEROY wrote:
>> Le 10/03/2017 à 14:06, Segher Boessenkool a écrit :
>>> On Fri, Mar 10, 2017 at 11:54:19AM +0100, Christophe LEROY wrote:
>>>> gpio_get() and gpio_set() are used extensively by some GPIO based
>>>> drivers like SPI, NAND, so it may be worth it as it doesn't impair
>>>> readability (if anyone prefers, we could write  (1 << 31) >> i  instead
>>>> of  0x80000000 >> i )
>>>
>>> 1 << 31 is undefined behaviour, of course.
>>>
>>
>> Shall it be 1U << 31 ?
>
> Sure, that works.  "1 << (31 - i)" is most readable (but it doesn't yet
> generate the code you want).
>
>

Euh .... I'm a bit lost. Do you mean the form we have today is the 
driver is wrong ?


@@ -684,9 +682,7 @@ static int cpm1_gpio32_get(struct gpio_chip *gc, 
unsigned int gpio)
  {
  	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
  	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
-	u32 pin_mask;
-
-	pin_mask = 1 << (31 - gpio);
+	u32 pin_mask = 0x80000000 >> gpio;

  	return !!(in_be32(&iop->dat) & pin_mask);
  }


Which I thought could also become


@@ -684,9 +682,7 @@ static int cpm1_gpio32_get(struct gpio_chip *gc, 
unsigned int gpio)
  {
  	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
  	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
-	u32 pin_mask;
-
-	pin_mask = 1 << (31 - gpio);
+	u32 pin_mask = (1 << 31) >> gpio;

  	return !!(in_be32(&iop->dat) & pin_mask);
  }


Christophe
Segher Boessenkool March 10, 2017, 3:41 p.m. UTC | #8
On Fri, Mar 10, 2017 at 03:41:23PM +0100, Christophe LEROY wrote:
> >>>>gpio_get() and gpio_set() are used extensively by some GPIO based
> >>>>drivers like SPI, NAND, so it may be worth it as it doesn't impair
> >>>>readability (if anyone prefers, we could write  (1 << 31) >> i  instead
> >>>>of  0x80000000 >> i )
> >>>
> >>>1 << 31 is undefined behaviour, of course.
> >>
> >>Shall it be 1U << 31 ?
> >
> >Sure, that works.  "1 << (31 - i)" is most readable (but it doesn't yet
> >generate the code you want).
> 
> Euh .... I'm a bit lost. Do you mean the form we have today is the 
> driver is wrong ?

Heh, yes.  But is't okay with GCC, so don't worry about it.

The point is that "0x80000000 >> i" is less readable.


Segher
Christophe Leroy March 21, 2017, 3:04 p.m. UTC | #9
Le 10/03/2017 à 16:41, Segher Boessenkool a écrit :
> On Fri, Mar 10, 2017 at 03:41:23PM +0100, Christophe LEROY wrote:
>>>>>> gpio_get() and gpio_set() are used extensively by some GPIO based
>>>>>> drivers like SPI, NAND, so it may be worth it as it doesn't impair
>>>>>> readability (if anyone prefers, we could write  (1 << 31) >> i  instead
>>>>>> of  0x80000000 >> i )
>>>>> 1 << 31 is undefined behaviour, of course.
>>>> Shall it be 1U << 31 ?
>>> Sure, that works.  "1 << (31 - i)" is most readable (but it doesn't yet
>>> generate the code you want).
>> Euh .... I'm a bit lost. Do you mean the form we have today is the
>> driver is wrong ?
> Heh, yes.  But is't okay with GCC, so don't worry about it.
>
> The point is that "0x80000000 >> i" is less readable.
>
>
FYI, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80131

Christophe
diff mbox

Patch

diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index dc3653da6dd1..2b0bb55612d2 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -307,7 +307,7 @@  struct cpm_ioport32e {
 static void cpm1_set_pin32(int port, int pin, int flags)
 {
 	struct cpm_ioport32e __iomem *iop;
-	pin = 1 << (31 - pin);
+	pin = 0x80000000 >> pin;
 
 	if (port == CPM_PORTB)
 		iop = (struct cpm_ioport32e __iomem *)
@@ -351,7 +351,7 @@  static void cpm1_set_pin16(int port, int pin, int flags)
 	struct cpm_ioport16 __iomem *iop =
 		(struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
 
-	pin = 1 << (15 - pin);
+	pin = 0x8000 >> pin;
 
 	if (port != 0)
 		iop += port - 1;
@@ -550,9 +550,7 @@  static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
 {
 	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
 	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
-	u16 pin_mask;
-
-	pin_mask = 1 << (15 - gpio);
+	u16 pin_mask = pin_mask = 0x8000 >> gpio;
 
 	return !!(in_be16(&iop->dat) & pin_mask);
 }
@@ -576,7 +574,7 @@  static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
 	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
 	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
 	unsigned long flags;
-	u16 pin_mask = 1 << (15 - gpio);
+	u16 pin_mask = 0x8000 >> gpio;
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
@@ -599,7 +597,7 @@  static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
 	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
 	unsigned long flags;
-	u16 pin_mask = 1 << (15 - gpio);
+	u16 pin_mask = 0x8000 >> gpio;
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
@@ -617,7 +615,7 @@  static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
 	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
 	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
 	unsigned long flags;
-	u16 pin_mask = 1 << (15 - gpio);
+	u16 pin_mask = 0x8000 >> gpio;
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
@@ -684,9 +682,7 @@  static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
 {
 	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
 	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
-	u32 pin_mask;
-
-	pin_mask = 1 << (31 - gpio);
+	u32 pin_mask = 0x80000000 >> gpio;
 
 	return !!(in_be32(&iop->dat) & pin_mask);
 }
@@ -710,7 +706,7 @@  static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
 	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
 	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
 	unsigned long flags;
-	u32 pin_mask = 1 << (31 - gpio);
+	u32 pin_mask = 0x80000000 >> gpio;
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
@@ -725,7 +721,7 @@  static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
 	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
 	unsigned long flags;
-	u32 pin_mask = 1 << (31 - gpio);
+	u32 pin_mask = 0x80000000 >> gpio;
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
@@ -743,7 +739,7 @@  static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
 	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
 	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
 	unsigned long flags;
-	u32 pin_mask = 1 << (31 - gpio);
+	u32 pin_mask = 0x80000000 >> gpio;
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);