Message ID | 20201202013412.440301-1-iii@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | IBM Z: Use llihf and oilf to load large immediates into GPRs | expand |
On 12/2/20 2:34 AM, Ilya Leoshkevich wrote: > Bootstrapped and regtesed on s390x-redhat-linux. There are slight > improvements in all SPEC benchmarks, no regressions that could not be > "fixed" by adding nops. Ok for master? > > > > Currently GCC loads large immediates into GPRs from the literal pool, > which is not as efficient as loading two halves with llihf and oilf. > > gcc/ChangeLog: > > 2020-11-30 Ilya Leoshkevich <iii@linux.ibm.com> > > * config/s390/s390-protos.h (s390_const_int_pool_entry_p): New > function. > * config/s390/s390.c (s390_const_int_pool_entry_p): New > function. > * config/s390/s390.md: Add define_peephole2 that produces llihf > and oilf. > > gcc/testsuite/ChangeLog: > > 2020-11-30 Ilya Leoshkevich <iii@linux.ibm.com> > > * gcc.target/s390/load-imm64-1.c: New test. > * gcc.target/s390/load-imm64-2.c: New test. > --- > gcc/config/s390/s390-protos.h | 1 + > gcc/config/s390/s390.c | 31 ++++++++++++++++++++ > gcc/config/s390/s390.md | 22 ++++++++++++++ > gcc/testsuite/gcc.target/s390/load-imm64-1.c | 10 +++++++ > gcc/testsuite/gcc.target/s390/load-imm64-2.c | 10 +++++++ > 5 files changed, 74 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-1.c > create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-2.c > > diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h > index ad2f7f77c18..eb10c3f4bbb 100644 > --- a/gcc/config/s390/s390-protos.h > +++ b/gcc/config/s390/s390-protos.h > @@ -135,6 +135,7 @@ extern void s390_split_access_reg (rtx, rtx *, rtx *); > extern void print_operand_address (FILE *, rtx); > extern void print_operand (FILE *, rtx, int); > extern void s390_output_pool_entry (rtx, machine_mode, unsigned int); > +extern bool s390_const_int_pool_entry_p (rtx, HOST_WIDE_INT *); > extern int s390_label_align (rtx_insn *); > extern int s390_agen_dep_p (rtx_insn *, rtx_insn *); > extern rtx_insn *s390_load_got (void); > diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c > index 02f18366aa1..e3d68d3543b 100644 > --- a/gcc/config/s390/s390.c > +++ b/gcc/config/s390/s390.c > @@ -9400,6 +9400,37 @@ s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align) > } > } > > +/* Return true if MEM refers to an integer constant in the literal pool. If > + VAL is not nullptr, then also fill it with the constant's value. */ > + > +bool > +s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val) > +{ > + /* Try to match the following: > + - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)). > + - (mem (symbol_ref)). */ > + > + if (!MEM_P (mem)) > + return false; > + > + rtx addr = XEXP (mem, 0); > + rtx sym; > + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF) > + sym = XVECEXP (addr, 0, 0); > + else > + sym = addr; > + > + if (GET_CODE (sym) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (sym)) !SYMBOL_REF_P (sym) > + return false; > + > + rtx val_rtx = get_pool_constant (sym); > + if (!CONST_INT_P (val_rtx)) > + return false; > + > + if (val != nullptr) > + *val = INTVAL (val_rtx); > + return true; > +} Alternatively you probably could have returned the RTX instead and use gen_highpart / gen_lowpart in the peephole. But no need to change that. > > /* Return an RTL expression representing the value of the return address > for the frame COUNT steps up from the current frame. FRAME is the > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 910415a5974..79e9a75ba2f 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -2116,6 +2116,28 @@ (define_peephole2 > [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] > "") > > +; Split loading of 64-bit constants into GPRs into llihf + oilf - > +; counterintuitively, using oilf is faster than iilf. oilf clobbers > +; cc, so cc must be dead. > +(define_peephole2 > + [(set (match_operand:DI 0 "register_operand" "") > + (match_operand:DI 1 "memory_operand" ""))] > + "TARGET_64BIT > + && TARGET_EXTIMM > + && GENERAL_REG_P (operands[0]) > + && s390_const_int_pool_entry_p (operands[1], nullptr) > + && peep2_reg_dead_p (1, gen_rtx_REG (CCmode, CC_REGNUM))" > + [(set (match_dup 0) (match_dup 2)) > + (parallel > + [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3))) > + (clobber (reg:CC CC_REGNUM))])] > +{ > + HOST_WIDE_INT val; > + gcc_assert (s390_const_int_pool_entry_p (operands[1], &val)); This probably breaks with checking disabled. > + operands[2] = gen_rtx_CONST_INT (DImode, val & 0xFFFFFFFF00000000); > + operands[3] = gen_rtx_CONST_INT (DImode, val & 0x00000000FFFFFFFF); ULL for the constants? > +}) > + > ; > ; movsi instruction pattern(s). > ; > diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-1.c b/gcc/testsuite/gcc.target/s390/load-imm64-1.c > new file mode 100644 > index 00000000000..db0a89395aa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/load-imm64-1.c > @@ -0,0 +1,10 @@ > +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is > + not available. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=z9-109" } */ > + > +unsigned long magic (void) { return 0x3f08c5392f756cd; } > + > +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */ > +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */ > diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-2.c b/gcc/testsuite/gcc.target/s390/load-imm64-2.c > new file mode 100644 > index 00000000000..43c00cdca3a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/load-imm64-2.c > @@ -0,0 +1,10 @@ > +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is > + available. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=z10" } */ > + > +unsigned long magic (void) { return 0x3f08c5392f756cd; } > + > +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */ > +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */ >
On Wed, 2020-12-02 at 08:15 +0100, Andreas Krebbel wrote: > On 12/2/20 2:34 AM, Ilya Leoshkevich wrote: > > Bootstrapped and regtesed on s390x-redhat-linux. There are slight > > improvements in all SPEC benchmarks, no regressions that could not > > be > > "fixed" by adding nops. Ok for master? > > > > > > > > Currently GCC loads large immediates into GPRs from the literal > > pool, > > which is not as efficient as loading two halves with llihf and > > oilf. > > > > gcc/ChangeLog: > > > > 2020-11-30 Ilya Leoshkevich <iii@linux.ibm.com> > > > > * config/s390/s390-protos.h (s390_const_int_pool_entry_p): New > > function. > > * config/s390/s390.c (s390_const_int_pool_entry_p): New > > function. > > * config/s390/s390.md: Add define_peephole2 that produces llihf > > and oilf. > > > > gcc/testsuite/ChangeLog: > > > > 2020-11-30 Ilya Leoshkevich <iii@linux.ibm.com> > > > > * gcc.target/s390/load-imm64-1.c: New test. > > * gcc.target/s390/load-imm64-2.c: New test. > > --- > > gcc/config/s390/s390-protos.h | 1 + > > gcc/config/s390/s390.c | 31 > > ++++++++++++++++++++ > > gcc/config/s390/s390.md | 22 ++++++++++++++ > > gcc/testsuite/gcc.target/s390/load-imm64-1.c | 10 +++++++ > > gcc/testsuite/gcc.target/s390/load-imm64-2.c | 10 +++++++ > > 5 files changed, 74 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-1.c > > create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-2.c > > > > diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390- > > protos.h > > index ad2f7f77c18..eb10c3f4bbb 100644 > > --- a/gcc/config/s390/s390-protos.h > > +++ b/gcc/config/s390/s390-protos.h > > @@ -135,6 +135,7 @@ extern void s390_split_access_reg (rtx, rtx *, > > rtx *); > > extern void print_operand_address (FILE *, rtx); > > extern void print_operand (FILE *, rtx, int); > > extern void s390_output_pool_entry (rtx, machine_mode, unsigned > > int); > > +extern bool s390_const_int_pool_entry_p (rtx, HOST_WIDE_INT *); > > extern int s390_label_align (rtx_insn *); > > extern int s390_agen_dep_p (rtx_insn *, rtx_insn *); > > extern rtx_insn *s390_load_got (void); > > diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c > > index 02f18366aa1..e3d68d3543b 100644 > > --- a/gcc/config/s390/s390.c > > +++ b/gcc/config/s390/s390.c > > @@ -9400,6 +9400,37 @@ s390_output_pool_entry (rtx exp, > > machine_mode mode, unsigned int align) > > } > > } > > > > +/* Return true if MEM refers to an integer constant in the literal > > pool. If > > + VAL is not nullptr, then also fill it with the constant's > > value. */ > > + > > +bool > > +s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val) > > +{ > > + /* Try to match the following: > > + - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)). > > + - (mem (symbol_ref)). */ > > + > > + if (!MEM_P (mem)) > > + return false; > > + > > + rtx addr = XEXP (mem, 0); > > + rtx sym; > > + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF) > > + sym = XVECEXP (addr, 0, 0); > > + else > > + sym = addr; > > + > > + if (GET_CODE (sym) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P > > (sym)) > !SYMBOL_REF_P (sym) Ok. > > > + return false; > > + > > + rtx val_rtx = get_pool_constant (sym); > > + if (!CONST_INT_P (val_rtx)) > > + return false; > > + > > + if (val != nullptr) > > + *val = INTVAL (val_rtx); > > + return true; > > +} > Alternatively you probably could have returned the RTX instead and > use gen_highpart / gen_lowpart in > the peephole. But no need to change that. I'll give it a try and see if the code looks better. > > > > > /* Return an RTL expression representing the value of the return > > address > > for the frame COUNT steps up from the current frame. FRAME is > > the > > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > > index 910415a5974..79e9a75ba2f 100644 > > --- a/gcc/config/s390/s390.md > > +++ b/gcc/config/s390/s390.md > > @@ -2116,6 +2116,28 @@ (define_peephole2 > > [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] > > "") > > > > +; Split loading of 64-bit constants into GPRs into llihf + oilf - > > +; counterintuitively, using oilf is faster than iilf. oilf > > clobbers > > +; cc, so cc must be dead. > > +(define_peephole2 > > + [(set (match_operand:DI 0 "register_operand" "") > > + (match_operand:DI 1 "memory_operand" ""))] > > + "TARGET_64BIT > > + && TARGET_EXTIMM > > + && GENERAL_REG_P (operands[0]) > > + && s390_const_int_pool_entry_p (operands[1], nullptr) > > + && peep2_reg_dead_p (1, gen_rtx_REG (CCmode, CC_REGNUM))" > > + [(set (match_dup 0) (match_dup 2)) > > + (parallel > > + [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3))) > > + (clobber (reg:CC CC_REGNUM))])] > > +{ > > + HOST_WIDE_INT val; > > + gcc_assert (s390_const_int_pool_entry_p (operands[1], &val)); > > This probably breaks with checking disabled. Ouch, you are right. I assumed this can happen only with gcc_checking_assert, but regular asserts can be disabled too. > > > + operands[2] = gen_rtx_CONST_INT (DImode, val & > > 0xFFFFFFFF00000000); > > + operands[3] = gen_rtx_CONST_INT (DImode, val & > > 0x00000000FFFFFFFF); > > ULL for the constants? Ok (or maybe this will go away with gen_highpart / gen_lowpart). > > > +}) > > + > > ; > > ; movsi instruction pattern(s). > > ; > > diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-1.c > > b/gcc/testsuite/gcc.target/s390/load-imm64-1.c > > new file mode 100644 > > index 00000000000..db0a89395aa > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/s390/load-imm64-1.c > > @@ -0,0 +1,10 @@ > > +/* Test that large 64-bit constants are loaded with llihf + oilf > > when lgrl is > > + not available. */ > > + > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=z9-109" } */ > > + > > +unsigned long magic (void) { return 0x3f08c5392f756cd; } > > + > > +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 > > } } } */ > > +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 > > } } } */ > > diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-2.c > > b/gcc/testsuite/gcc.target/s390/load-imm64-2.c > > new file mode 100644 > > index 00000000000..43c00cdca3a > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/s390/load-imm64-2.c > > @@ -0,0 +1,10 @@ > > +/* Test that large 64-bit constants are loaded with llihf + oilf > > when lgrl is > > + available. */ > > + > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=z10" } */ > > + > > +unsigned long magic (void) { return 0x3f08c5392f756cd; } > > + > > +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 > > } } } */ > > +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 > > } } } */ > >
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index ad2f7f77c18..eb10c3f4bbb 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -135,6 +135,7 @@ extern void s390_split_access_reg (rtx, rtx *, rtx *); extern void print_operand_address (FILE *, rtx); extern void print_operand (FILE *, rtx, int); extern void s390_output_pool_entry (rtx, machine_mode, unsigned int); +extern bool s390_const_int_pool_entry_p (rtx, HOST_WIDE_INT *); extern int s390_label_align (rtx_insn *); extern int s390_agen_dep_p (rtx_insn *, rtx_insn *); extern rtx_insn *s390_load_got (void); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 02f18366aa1..e3d68d3543b 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -9400,6 +9400,37 @@ s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align) } } +/* Return true if MEM refers to an integer constant in the literal pool. If + VAL is not nullptr, then also fill it with the constant's value. */ + +bool +s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val) +{ + /* Try to match the following: + - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)). + - (mem (symbol_ref)). */ + + if (!MEM_P (mem)) + return false; + + rtx addr = XEXP (mem, 0); + rtx sym; + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF) + sym = XVECEXP (addr, 0, 0); + else + sym = addr; + + if (GET_CODE (sym) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (sym)) + return false; + + rtx val_rtx = get_pool_constant (sym); + if (!CONST_INT_P (val_rtx)) + return false; + + if (val != nullptr) + *val = INTVAL (val_rtx); + return true; +} /* Return an RTL expression representing the value of the return address for the frame COUNT steps up from the current frame. FRAME is the diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 910415a5974..79e9a75ba2f 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -2116,6 +2116,28 @@ (define_peephole2 [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] "") +; Split loading of 64-bit constants into GPRs into llihf + oilf - +; counterintuitively, using oilf is faster than iilf. oilf clobbers +; cc, so cc must be dead. +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "memory_operand" ""))] + "TARGET_64BIT + && TARGET_EXTIMM + && GENERAL_REG_P (operands[0]) + && s390_const_int_pool_entry_p (operands[1], nullptr) + && peep2_reg_dead_p (1, gen_rtx_REG (CCmode, CC_REGNUM))" + [(set (match_dup 0) (match_dup 2)) + (parallel + [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC CC_REGNUM))])] +{ + HOST_WIDE_INT val; + gcc_assert (s390_const_int_pool_entry_p (operands[1], &val)); + operands[2] = gen_rtx_CONST_INT (DImode, val & 0xFFFFFFFF00000000); + operands[3] = gen_rtx_CONST_INT (DImode, val & 0x00000000FFFFFFFF); +}) + ; ; movsi instruction pattern(s). ; diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-1.c b/gcc/testsuite/gcc.target/s390/load-imm64-1.c new file mode 100644 index 00000000000..db0a89395aa --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/load-imm64-1.c @@ -0,0 +1,10 @@ +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is + not available. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z9-109" } */ + +unsigned long magic (void) { return 0x3f08c5392f756cd; } + +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */ diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-2.c b/gcc/testsuite/gcc.target/s390/load-imm64-2.c new file mode 100644 index 00000000000..43c00cdca3a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/load-imm64-2.c @@ -0,0 +1,10 @@ +/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is + available. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z10" } */ + +unsigned long magic (void) { return 0x3f08c5392f756cd; } + +/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */