diff mbox series

[ARM] Correctly set SLOW_BYTE_ACCESS

Message ID HE1PR0801MB21218A82D9B5C5612087DB6F83B10@HE1PR0801MB2121.eurprd08.prod.outlook.com
State New
Headers show
Series [ARM] Correctly set SLOW_BYTE_ACCESS | expand

Commit Message

Wilco Dijkstra Sept. 11, 2019, 3:48 p.m. UTC
Contrary to all documentation, SLOW_BYTE_ACCESS simply means accessing
bitfields by their declared type, which results in better codegeneration
on practically any target.  So set it correctly to 1 on Arm.

As a result we generate much better code for bitfields:

typedef struct
{
  int x : 2, y : 8, z : 2;
} X;

int bitfield (X *p)
{
  return p->x + p->y + p->z;
}


Before:
	ldrb	r3, [r0]	@ zero_extendqisi2
	ldrh	r2, [r0]
	ldrb	r0, [r0, #1]	@ zero_extendqisi2
	sbfx	r3, r3, #0, #2
	sbfx	r2, r2, #2, #8
	sbfx	r0, r0, #2, #2
	sxtab	r3, r2, r3
	sxtab	r0, r3, r0
	bx	lr

After:
	ldr	r0, [r0]
	sbfx	r3, r0, #0, #2
	sbfx	r2, r0, #2, #8
	sbfx	r0, r0, #10, #2
	sxtab	r3, r2, r3
	add	r0, r0, r3
	bx	lr

Bootstrap OK, OK for commit?

ChangeLog:
2019-09-11  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/arm/arm.h (SLOW_BYTE_ACCESS): Set to 1.

--

Comments

Paul Koning Sept. 11, 2019, 5:05 p.m. UTC | #1
> On Sep 11, 2019, at 11:48 AM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> 
> Contrary to all documentation, SLOW_BYTE_ACCESS simply means accessing
> bitfields by their declared type, which results in better codegeneration
> on practically any target.  So set it correctly to 1 on Arm.

If the documentation is indeed wrong, could you submit a patch to cure that documentation defect?

	paul
Wilco Dijkstra Sept. 11, 2019, 5:20 p.m. UTC | #2
Hi Paul,

> > On Sep 11, 2019, at 11:48 AM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> > 
> > Contrary to all documentation, SLOW_BYTE_ACCESS simply means accessing
> > bitfields by their declared type, which results in better codegeneration
> > on practically any target.  So set it correctly to 1 on Arm.
> 
> If the documentation is indeed wrong, could you submit a patch to cure that documentation defect?
 
I already submitted a patch to completely remove SLOW_BYTE_ACCESS since there
are only 2 uses of it in all of GCC. There is already a target call to handle volatile
bitfields, the other use is completely unrelated (since it's not even about memory access)
and dead code as far as I can see.

Wilco
Wilco Dijkstra Oct. 10, 2019, 5:19 p.m. UTC | #3
ping

Contrary to all documentation, SLOW_BYTE_ACCESS simply means accessing
bitfields by their declared type, which results in better codegeneration
on practically any target.  So set it correctly to 1 on Arm.

As a result we generate much better code for bitfields:

typedef struct
{
  int x : 2, y : 8, z : 2;
} X;

int bitfield (X *p)
{
  return p->x + p->y + p->z;
}


Before:
        ldrb    r3, [r0]        @ zero_extendqisi2
        ldrh    r2, [r0]
        ldrb    r0, [r0, #1]    @ zero_extendqisi2
        sbfx    r3, r3, #0, #2
        sbfx    r2, r2, #2, #8
        sbfx    r0, r0, #2, #2
        sxtab   r3, r2, r3
        sxtab   r0, r3, r0
        bx      lr

After:
        ldr     r0, [r0]
        sbfx    r3, r0, #0, #2
        sbfx    r2, r0, #2, #8
        sbfx    r0, r0, #10, #2
        sxtab   r3, r2, r3
        add     r0, r0, r3
        bx      lr

Bootstrap OK, OK for commit?

ChangeLog:
2019-09-11  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/arm/arm.h (SLOW_BYTE_ACCESS): Set to 1.

--

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8b92c830de09a3ad49420fdfacde02d8efc2a89b..11212d988a0f56299c2266bace80170d074be56c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1892,8 +1892,9 @@ enum arm_auto_incmodes
    ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND                      \
     : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN)))
 
-/* Nonzero if access to memory by bytes is slow and undesirable.  */
-#define SLOW_BYTE_ACCESS 0
+/* Contrary to all documentation, this enables wide bitfield accesses,
+   which results in better code when accessing multiple bitfields.  */
+#define SLOW_BYTE_ACCESS 1
 
 /* Immediate shift counts are truncated by the output routines (or was it
    the assembler?).  Shift counts in a register are truncated by ARM.  Note
Wilco Dijkstra Jan. 21, 2020, 2:06 p.m. UTC | #4
ping (updated comment to use the same wording as the AArch64 version on trunk)

Contrary to all documentation, SLOW_BYTE_ACCESS simply means accessing
bitfields by their declared type, which results in better codegeneration
on practically any target.  So set it correctly to 1 on Arm.

As a result we generate much better code for bitfields:

typedef struct
{
  int x : 2, y : 8, z : 2;
} X;

int bitfield (X *p)
{
  return p->x + p->y + p->z;
}


Before:
	ldrb	r3, [r0]	@ zero_extendqisi2
	ldrh	r2, [r0]
	ldrb	r0, [r0, #1]	@ zero_extendqisi2
	sbfx	r3, r3, #0, #2
	sbfx	r2, r2, #2, #8
	sbfx	r0, r0, #2, #2
	sxtab	r3, r2, r3
	sxtab	r0, r3, r0
	bx	lr

After:
	ldr	r0, [r0]
	sbfx	r3, r0, #0, #2
	sbfx	r2, r0, #2, #8
	sbfx	r0, r0, #10, #2
	sxtab	r3, r2, r3
	add	r0, r0, r3
	bx	lr

Bootstrap OK, OK for commit?

ChangeLog:
2019-09-11  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/arm/arm.h (SLOW_BYTE_ACCESS): Set to 1.

--
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index e07cf03538c5bb23e3285859b9e44a627b6e9ced..998139ce759d5829b7f868367d4263df9d0e12d9 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1956,8 +1956,8 @@ enum arm_auto_incmodes
    ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND			\
     : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN)))
 
-/* Nonzero if access to memory by bytes is slow and undesirable.  */
-#define SLOW_BYTE_ACCESS 0
+/* Enable wide bitfield accesses for more efficient bitfield code.  */
+#define SLOW_BYTE_ACCESS 1
 
 /* Immediate shift counts are truncated by the output routines (or was it
    the assembler?).  Shift counts in a register are truncated by ARM.  Note
Wilco Dijkstra Feb. 4, 2020, 10:50 a.m. UTC | #5
ping (updated comment to use the same wording as the AArch64 version on trunk)

Contrary to all documentation, SLOW_BYTE_ACCESS simply means accessing
bitfields by their declared type, which results in better codegeneration
on practically any target.  So set it correctly to 1 on Arm.

As a result we generate much better code for bitfields:

typedef struct
{
  int x : 2, y : 8, z : 2;
} X;

int bitfield (X *p)
{
  return p->x + p->y + p->z;
}


Before:
        ldrb    r3, [r0]        @ zero_extendqisi2
        ldrh    r2, [r0]
        ldrb    r0, [r0, #1]    @ zero_extendqisi2
        sbfx    r3, r3, #0, #2
        sbfx    r2, r2, #2, #8
        sbfx    r0, r0, #2, #2
        sxtab   r3, r2, r3
        sxtab   r0, r3, r0
        bx      lr

After:
        ldr     r0, [r0]
        sbfx    r3, r0, #0, #2
        sbfx    r2, r0, #2, #8
        sbfx    r0, r0, #10, #2
        sxtab   r3, r2, r3
        add     r0, r0, r3
        bx      lr

Bootstrap OK, OK for commit?

ChangeLog:
2019-09-11  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/arm/arm.h (SLOW_BYTE_ACCESS): Set to 1.

--
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index e07cf03538c5bb23e3285859b9e44a627b6e9ced..998139ce759d5829b7f868367d4263df9d0e12d9 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1956,8 +1956,8 @@ enum arm_auto_incmodes
    ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND                      \
     : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN)))
 
-/* Nonzero if access to memory by bytes is slow and undesirable.  */
-#define SLOW_BYTE_ACCESS 0
+/* Enable wide bitfield accesses for more efficient bitfield code.  */
+#define SLOW_BYTE_ACCESS 1
 
 /* Immediate shift counts are truncated by the output routines (or was it
    the assembler?).  Shift counts in a register are truncated by ARM.  Note
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8b92c830de09a3ad49420fdfacde02d8efc2a89b..11212d988a0f56299c2266bace80170d074be56c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1892,8 +1892,9 @@  enum arm_auto_incmodes
    ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND			\
     : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN)))
 
-/* Nonzero if access to memory by bytes is slow and undesirable.  */
-#define SLOW_BYTE_ACCESS 0
+/* Contrary to all documentation, this enables wide bitfield accesses,
+   which results in better code when accessing multiple bitfields.  */
+#define SLOW_BYTE_ACCESS 1
 
 /* Immediate shift counts are truncated by the output routines (or was it
    the assembler?).  Shift counts in a register are truncated by ARM.  Note