Message ID | 5A7C847E.1020704@foss.arm.com |
---|---|
State | New |
Headers | show |
Series | [AArch64,1/3] PR target/84164: Simplify subreg + redundant AND-immediate (was: PR target/84164: Relax predicate in *aarch64_<optab>_reg_di3_mask2) | expand |
Thanks for doing this. Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> writes: > diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c > index 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 100644 > --- a/gcc/simplify-rtx.c > +++ b/gcc/simplify-rtx.c > @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx op, > return NULL_RTX; > } > > + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) > + into (subreg:QI (reg:SI) 0). */ > + scalar_int_mode int_outermode, int_innermode; > + if (!paradoxical_subreg_p (outermode, innermode) > + && is_a <scalar_int_mode> (outermode, &int_outermode) > + && is_a <scalar_int_mode> (innermode, &int_innermode) > + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) > + && known_eq (subreg_lowpart_offset (outermode, innermode), byte) > + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) == 0 > + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) > + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); > + > /* A SUBREG resulting from a zero extension may fold to zero if > it extracts higher bits that the ZERO_EXTEND's source bits. */ > if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) I think it'd be better to do this in simplify_truncation (shared by the subreg code and the TRUNCATE code). The return would then be simplify_gen_unary (TRUNCATE, ...), which will become a subreg if TRULY_NOOP_TRUNCATION. Thanks, A different Richard
Hi Richard, On 08/02/18 20:29, Richard Sandiford wrote: > Thanks for doing this. > > Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> writes: >> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c >> index 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 100644 >> --- a/gcc/simplify-rtx.c >> +++ b/gcc/simplify-rtx.c >> @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx op, >> return NULL_RTX; >> } >> >> + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) >> + into (subreg:QI (reg:SI) 0). */ >> + scalar_int_mode int_outermode, int_innermode; >> + if (!paradoxical_subreg_p (outermode, innermode) >> + && is_a <scalar_int_mode> (outermode, &int_outermode) >> + && is_a <scalar_int_mode> (innermode, &int_innermode) >> + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) >> + && known_eq (subreg_lowpart_offset (outermode, innermode), byte) >> + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) == 0 >> + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) >> + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); >> + >> /* A SUBREG resulting from a zero extension may fold to zero if >> it extracts higher bits that the ZERO_EXTEND's source bits. */ >> if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) > I think it'd be better to do this in simplify_truncation (shared > by the subreg code and the TRUNCATE code). The return would then > be simplify_gen_unary (TRUNCATE, ...), which will become a subreg > if TRULY_NOOP_TRUNCATION. Thanks, that does look cleaner. Bootstrapped and tested on arm-none-linux-gnueabihf, aarch64-none-linux-gnu and x86_64-unknown-linux-gnu. The other two patches are still needed to address the fallout. Is this ok? Thanks, Kyrill 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> PR target/84164 * simplify-rtx.c (simplify_truncation): Simplify truncation of masking operation. * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): Use simplify_gen_unary creating a SUBREG. (*aarch64_reg_<mode>3_minus_mask): Likewise. (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate for operand 3. 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> PR target/84164 * gcc.c-torture/compile/pr84164.c: New test. commit 3bc951e94bec9395a732b35038dc0abf8785ba7d Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> Date: Thu Feb 1 13:57:46 2018 +0000 [AArch64] PR target/84164: Simplify subreg + redundant AND-immediate diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 0d13c35..69ff5ca 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4278,8 +4278,10 @@ (define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2" emit_insn (gen_negsi2 (tmp, operands[2])); rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]); - rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op, - SUBREG_BYTE (operands[4])); + rtx subreg_tmp = simplify_gen_unary (TRUNCATE, GET_MODE (operands[4]), + and_op, SImode); + + gcc_assert (subreg_tmp); emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp)); DONE; } @@ -4305,9 +4307,10 @@ (define_insn_and_split "*aarch64_reg_<mode>3_minus_mask" emit_insn (gen_negsi2 (tmp, operands[3])); rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]); - rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op, - SUBREG_BYTE (operands[5])); + rtx subreg_tmp = simplify_gen_unary (TRUNCATE, GET_MODE (operands[5]), + and_op, SImode); + gcc_assert (subreg_tmp); emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp)); DONE; } @@ -4318,9 +4321,9 @@ (define_insn "*aarch64_<optab>_reg_di3_mask2" (SHIFT:DI (match_operand:DI 1 "register_operand" "r") (match_operator 4 "subreg_lowpart_operator" - [(and:SI (match_operand:SI 2 "register_operand" "r") - (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))] - "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)" + [(and:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "const_int_operand" "n"))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode) - 1)) == 0)" { rtx xop[3]; xop[0] = operands[0]; diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 2e7aa5c..1ccfce8 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -848,6 +848,16 @@ simplify_truncation (machine_mode mode, rtx op, return simplify_gen_subreg (int_mode, SUBREG_REG (op), subreg_mode, 0); } + /* Simplify (truncate:QI (and:SI (reg:SI) (const_int 0xffff)) 0) + into (truncate:QI (reg:SI) 0). */ + + scalar_int_mode int_outermode, int_innermode; + if (is_a <scalar_int_mode> (mode, &int_outermode) + && is_a <scalar_int_mode> (op_mode, &int_innermode) + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) == 0) + return simplify_gen_unary (TRUNCATE, mode, XEXP (op, 0), op_mode); + /* (truncate:A (truncate:B X)) is (truncate:A X). */ if (GET_CODE (op) == TRUNCATE) return simplify_gen_unary (TRUNCATE, mode, XEXP (op, 0), diff --git a/gcc/testsuite/gcc.c-torture/compile/pr84164.c b/gcc/testsuite/gcc.c-torture/compile/pr84164.c new file mode 100644 index 0000000..d663fe5 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr84164.c @@ -0,0 +1,17 @@ +/* PR target/84164. */ + +int b, d; +unsigned long c; + +static inline __attribute__ ((always_inline)) void +bar (int e) +{ + e += __builtin_sub_overflow_p (b, d, c); + c = c << ((short) ~e & 3) | c >> (-((short) ~e & 3) & 63); +} + +int +foo (void) +{ + bar (~1); +}
Ping. https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00649.html CC'ing Eric and Jeff as the patch contains a simplify-rtx.c component that I'll need midend approval on. Thanks everyone for your comments so far. Kyrill On 12/02/18 15:18, Kyrill Tkachov wrote: > Hi Richard, > > On 08/02/18 20:29, Richard Sandiford wrote: >> Thanks for doing this. >> >> Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> writes: >>> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c >>> index 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 100644 >>> --- a/gcc/simplify-rtx.c >>> +++ b/gcc/simplify-rtx.c >>> @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx op, >>> return NULL_RTX; >>> } >>> + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) >>> + into (subreg:QI (reg:SI) 0). */ >>> + scalar_int_mode int_outermode, int_innermode; >>> + if (!paradoxical_subreg_p (outermode, innermode) >>> + && is_a <scalar_int_mode> (outermode, &int_outermode) >>> + && is_a <scalar_int_mode> (innermode, &int_innermode) >>> + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) >>> + && known_eq (subreg_lowpart_offset (outermode, innermode), byte) >>> + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) == 0 >>> + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) >>> + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); >>> + >>> /* A SUBREG resulting from a zero extension may fold to zero if >>> it extracts higher bits that the ZERO_EXTEND's source bits. */ >>> if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) >> I think it'd be better to do this in simplify_truncation (shared >> by the subreg code and the TRUNCATE code). The return would then >> be simplify_gen_unary (TRUNCATE, ...), which will become a subreg >> if TRULY_NOOP_TRUNCATION. > > Thanks, that does look cleaner. > Bootstrapped and tested on arm-none-linux-gnueabihf, aarch64-none-linux-gnu and x86_64-unknown-linux-gnu. > The other two patches are still needed to address the fallout. > > Is this ok? > > Thanks, > Kyrill > > 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > PR target/84164 > * simplify-rtx.c (simplify_truncation): Simplify truncation of masking > operation. > * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): > Use simplify_gen_unary creating a SUBREG. > (*aarch64_reg_<mode>3_minus_mask): Likewise. > (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate > for operand 3. > > 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > PR target/84164 > * gcc.c-torture/compile/pr84164.c: New test.
Ping. Thanks, Kyrill On 19/02/18 11:35, Kyrill Tkachov wrote: > Ping. > > https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00649.html > > CC'ing Eric and Jeff as the patch contains a simplify-rtx.c component that I'll need midend approval on. > > Thanks everyone for your comments so far. > Kyrill > > On 12/02/18 15:18, Kyrill Tkachov wrote: >> Hi Richard, >> >> On 08/02/18 20:29, Richard Sandiford wrote: >>> Thanks for doing this. >>> >>> Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> writes: >>>> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c >>>> index 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 100644 >>>> --- a/gcc/simplify-rtx.c >>>> +++ b/gcc/simplify-rtx.c >>>> @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx op, >>>> return NULL_RTX; >>>> } >>>> + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) >>>> + into (subreg:QI (reg:SI) 0). */ >>>> + scalar_int_mode int_outermode, int_innermode; >>>> + if (!paradoxical_subreg_p (outermode, innermode) >>>> + && is_a <scalar_int_mode> (outermode, &int_outermode) >>>> + && is_a <scalar_int_mode> (innermode, &int_innermode) >>>> + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) >>>> + && known_eq (subreg_lowpart_offset (outermode, innermode), byte) >>>> + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) == 0 >>>> + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) >>>> + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); >>>> + >>>> /* A SUBREG resulting from a zero extension may fold to zero if >>>> it extracts higher bits that the ZERO_EXTEND's source bits. */ >>>> if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) >>> I think it'd be better to do this in simplify_truncation (shared >>> by the subreg code and the TRUNCATE code). The return would then >>> be simplify_gen_unary (TRUNCATE, ...), which will become a subreg >>> if TRULY_NOOP_TRUNCATION. >> >> Thanks, that does look cleaner. >> Bootstrapped and tested on arm-none-linux-gnueabihf, aarch64-none-linux-gnu and x86_64-unknown-linux-gnu. >> The other two patches are still needed to address the fallout. >> >> Is this ok? >> >> Thanks, >> Kyrill >> >> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >> >> PR target/84164 >> * simplify-rtx.c (simplify_truncation): Simplify truncation of masking >> operation. >> * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): >> Use simplify_gen_unary creating a SUBREG. >> (*aarch64_reg_<mode>3_minus_mask): Likewise. >> (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate >> for operand 3. >> >> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >> >> PR target/84164 >> * gcc.c-torture/compile/pr84164.c: New test. >
On 03/01/2018 08:19 AM, Kyrill Tkachov wrote: > Ping. > > Thanks, > Kyrill > > On 19/02/18 11:35, Kyrill Tkachov wrote: >> Ping. >> >> https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00649.html >> >> CC'ing Eric and Jeff as the patch contains a simplify-rtx.c component >> that I'll need midend approval on. >> >> Thanks everyone for your comments so far. >> Kyrill >> >> On 12/02/18 15:18, Kyrill Tkachov wrote: >>> Hi Richard, >>> >>> On 08/02/18 20:29, Richard Sandiford wrote: >>>> Thanks for doing this. >>>> >>>> Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> writes: >>>>> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c >>>>> index >>>>> 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 >>>>> 100644 >>>>> --- a/gcc/simplify-rtx.c >>>>> +++ b/gcc/simplify-rtx.c >>>>> @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx >>>>> op, >>>>> return NULL_RTX; >>>>> } >>>>> + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) >>>>> + into (subreg:QI (reg:SI) 0). */ >>>>> + scalar_int_mode int_outermode, int_innermode; >>>>> + if (!paradoxical_subreg_p (outermode, innermode) >>>>> + && is_a <scalar_int_mode> (outermode, &int_outermode) >>>>> + && is_a <scalar_int_mode> (innermode, &int_innermode) >>>>> + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) >>>>> + && known_eq (subreg_lowpart_offset (outermode, innermode), >>>>> byte) >>>>> + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) >>>>> == 0 >>>>> + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) >>>>> + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); >>>>> + >>>>> /* A SUBREG resulting from a zero extension may fold to zero if >>>>> it extracts higher bits that the ZERO_EXTEND's source bits. */ >>>>> if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) >>>> I think it'd be better to do this in simplify_truncation (shared >>>> by the subreg code and the TRUNCATE code). The return would then >>>> be simplify_gen_unary (TRUNCATE, ...), which will become a subreg >>>> if TRULY_NOOP_TRUNCATION. >>> >>> Thanks, that does look cleaner. >>> Bootstrapped and tested on arm-none-linux-gnueabihf, >>> aarch64-none-linux-gnu and x86_64-unknown-linux-gnu. >>> The other two patches are still needed to address the fallout. >>> >>> Is this ok? >>> >>> Thanks, >>> Kyrill >>> >>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >>> >>> PR target/84164 >>> * simplify-rtx.c (simplify_truncation): Simplify truncation of >>> masking >>> operation. >>> * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): >>> Use simplify_gen_unary creating a SUBREG. >>> (*aarch64_reg_<mode>3_minus_mask): Likewise. >>> (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate >>> for operand 3. >>> >>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >>> >>> PR target/84164 >>> * gcc.c-torture/compile/pr84164.c: New test. Sorry. I suspect I dropped this from my inbox when it had the AArch64 marker -- I didn't realize it had a target independent component. The simplify-rtx bits are fine. The version in simplify_truncation is much better than the original in simplify_subreg (which I think needed to verify that you were looking at the lowpart before optimizing). jeff >> >
On 02/03/18 17:34, Jeff Law wrote: > On 03/01/2018 08:19 AM, Kyrill Tkachov wrote: >> Ping. >> >> Thanks, >> Kyrill >> >> On 19/02/18 11:35, Kyrill Tkachov wrote: >>> Ping. >>> >>> https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00649.html >>> >>> CC'ing Eric and Jeff as the patch contains a simplify-rtx.c component >>> that I'll need midend approval on. >>> >>> Thanks everyone for your comments so far. >>> Kyrill >>> >>> On 12/02/18 15:18, Kyrill Tkachov wrote: >>>> Hi Richard, >>>> >>>> On 08/02/18 20:29, Richard Sandiford wrote: >>>>> Thanks for doing this. >>>>> >>>>> Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> writes: >>>>>> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c >>>>>> index >>>>>> 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 >>>>>> 100644 >>>>>> --- a/gcc/simplify-rtx.c >>>>>> +++ b/gcc/simplify-rtx.c >>>>>> @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx >>>>>> op, >>>>>> return NULL_RTX; >>>>>> } >>>>>> + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) >>>>>> + into (subreg:QI (reg:SI) 0). */ >>>>>> + scalar_int_mode int_outermode, int_innermode; >>>>>> + if (!paradoxical_subreg_p (outermode, innermode) >>>>>> + && is_a <scalar_int_mode> (outermode, &int_outermode) >>>>>> + && is_a <scalar_int_mode> (innermode, &int_innermode) >>>>>> + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) >>>>>> + && known_eq (subreg_lowpart_offset (outermode, innermode), >>>>>> byte) >>>>>> + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) >>>>>> == 0 >>>>>> + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) >>>>>> + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); >>>>>> + >>>>>> /* A SUBREG resulting from a zero extension may fold to zero if >>>>>> it extracts higher bits that the ZERO_EXTEND's source bits. */ >>>>>> if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) >>>>> I think it'd be better to do this in simplify_truncation (shared >>>>> by the subreg code and the TRUNCATE code). The return would then >>>>> be simplify_gen_unary (TRUNCATE, ...), which will become a subreg >>>>> if TRULY_NOOP_TRUNCATION. >>>> Thanks, that does look cleaner. >>>> Bootstrapped and tested on arm-none-linux-gnueabihf, >>>> aarch64-none-linux-gnu and x86_64-unknown-linux-gnu. >>>> The other two patches are still needed to address the fallout. >>>> >>>> Is this ok? >>>> >>>> Thanks, >>>> Kyrill >>>> >>>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >>>> >>>> PR target/84164 >>>> * simplify-rtx.c (simplify_truncation): Simplify truncation of >>>> masking >>>> operation. >>>> * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): >>>> Use simplify_gen_unary creating a SUBREG. >>>> (*aarch64_reg_<mode>3_minus_mask): Likewise. >>>> (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate >>>> for operand 3. >>>> >>>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >>>> >>>> PR target/84164 >>>> * gcc.c-torture/compile/pr84164.c: New test. > Sorry. I suspect I dropped this from my inbox when it had the AArch64 > marker -- I didn't realize it had a target independent component. > > The simplify-rtx bits are fine. The version in simplify_truncation is > much better than the original in simplify_subreg (which I think needed > to verify that you were looking at the lowpart before optimizing). Thanks Jeff, I'd like to ask for approval for the aarch64 parts as well from the maintainers. Kyrill > jeff
On Fri, Mar 02, 2018 at 10:34:22AM -0700, Jeff Law wrote: > >>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > >>> > >>> PR target/84164 > >>> * simplify-rtx.c (simplify_truncation): Simplify truncation of > >>> masking > >>> operation. > >>> * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): > >>> Use simplify_gen_unary creating a SUBREG. > >>> (*aarch64_reg_<mode>3_minus_mask): Likewise. > >>> (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate > >>> for operand 3. > >>> > >>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > >>> > >>> PR target/84164 > >>> * gcc.c-torture/compile/pr84164.c: New test. > Sorry. I suspect I dropped this from my inbox when it had the AArch64 > marker -- I didn't realize it had a target independent component. > > The simplify-rtx bits are fine. The version in simplify_truncation is > much better than the original in simplify_subreg (which I think needed > to verify that you were looking at the lowpart before optimizing). Isn't that a stage1 material though? I fear given the amount of changes that needed to be done for it on i386.md that similar amount of work would be needed on many other targets, especially if they have less extensive testsuite coverage it might take a while to discover it. Jakub
On 14/03/18 14:07, Jakub Jelinek wrote: > On Fri, Mar 02, 2018 at 10:34:22AM -0700, Jeff Law wrote: >>>>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >>>>> >>>>> PR target/84164 >>>>> * simplify-rtx.c (simplify_truncation): Simplify truncation of >>>>> masking >>>>> operation. >>>>> * config/aarch64/aarch64.md (*aarch64_reg_<mode>3_neg_mask2): >>>>> Use simplify_gen_unary creating a SUBREG. >>>>> (*aarch64_reg_<mode>3_minus_mask): Likewise. >>>>> (*aarch64_<optab>_reg_di3_mask2): Use const_int_operand predicate >>>>> for operand 3. >>>>> >>>>> 2018-02-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com> >>>>> >>>>> PR target/84164 >>>>> * gcc.c-torture/compile/pr84164.c: New test. >> Sorry. I suspect I dropped this from my inbox when it had the AArch64 >> marker -- I didn't realize it had a target independent component. >> >> The simplify-rtx bits are fine. The version in simplify_truncation is >> much better than the original in simplify_subreg (which I think needed >> to verify that you were looking at the lowpart before optimizing). > Isn't that a stage1 material though? I fear given the amount of changes > that needed to be done for it on i386.md that similar amount of work would > be needed on many other targets, especially if they have less extensive > testsuite coverage it might take a while to discover it. Perhaps, in which case your patch would be safer, or my subset of that posted at: https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00102.html Maybe that would be preferable and we revisit this simplification for GCC 9? Thanks, Kyrill > Jakub
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 0d13c356d2a9b86c701c15b818e95df1a00abfc5..d9b4a405c9a1e5c09278b2329e5d73f12b0b963d 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4278,8 +4278,10 @@ (define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2" emit_insn (gen_negsi2 (tmp, operands[2])); rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]); - rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op, - SUBREG_BYTE (operands[4])); + rtx subreg_tmp = simplify_gen_subreg (GET_MODE (operands[4]), and_op, + SImode, SUBREG_BYTE (operands[4])); + + gcc_assert (subreg_tmp); emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp)); DONE; } @@ -4305,9 +4307,10 @@ (define_insn_and_split "*aarch64_reg_<mode>3_minus_mask" emit_insn (gen_negsi2 (tmp, operands[3])); rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]); - rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op, - SUBREG_BYTE (operands[5])); + rtx subreg_tmp = simplify_gen_subreg (GET_MODE (operands[5]), and_op, + SImode, SUBREG_BYTE (operands[5])); + gcc_assert (subreg_tmp); emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp)); DONE; } @@ -4318,9 +4321,9 @@ (define_insn "*aarch64_<optab>_reg_di3_mask2" (SHIFT:DI (match_operand:DI 1 "register_operand" "r") (match_operator 4 "subreg_lowpart_operator" - [(and:SI (match_operand:SI 2 "register_operand" "r") - (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))] - "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)" + [(and:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "const_int_operand" "n"))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode) - 1)) == 0)" { rtx xop[3]; xop[0] = operands[0]; diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 2e7aa5c12952ab1a9b49b5adaf23710327e577d3..af06d7502cebac03cefc689b2646874b8397e767 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -6474,6 +6474,18 @@ simplify_subreg (machine_mode outermode, rtx op, return NULL_RTX; } + /* Simplify (subreg:QI (and:SI (reg:SI) (const_int 0xffff)) 0) + into (subreg:QI (reg:SI) 0). */ + scalar_int_mode int_outermode, int_innermode; + if (!paradoxical_subreg_p (outermode, innermode) + && is_a <scalar_int_mode> (outermode, &int_outermode) + && is_a <scalar_int_mode> (innermode, &int_innermode) + && GET_CODE (op) == AND && CONST_INT_P (XEXP (op, 1)) + && known_eq (subreg_lowpart_offset (outermode, innermode), byte) + && (~INTVAL (XEXP (op, 1)) & GET_MODE_MASK (int_outermode)) == 0 + && validate_subreg (outermode, innermode, XEXP (op, 0), byte)) + return gen_rtx_SUBREG (outermode, XEXP (op, 0), byte); + /* A SUBREG resulting from a zero extension may fold to zero if it extracts higher bits that the ZERO_EXTEND's source bits. */ if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) @@ -6483,7 +6495,7 @@ simplify_subreg (machine_mode outermode, rtx op, return CONST0_RTX (outermode); } - scalar_int_mode int_outermode, int_innermode; + if (is_a <scalar_int_mode> (outermode, &int_outermode) && is_a <scalar_int_mode> (innermode, &int_innermode) && known_eq (byte, subreg_lowpart_offset (int_outermode, int_innermode))) diff --git a/gcc/testsuite/gcc.c-torture/compile/pr84164.c b/gcc/testsuite/gcc.c-torture/compile/pr84164.c new file mode 100644 index 0000000000000000000000000000000000000000..d663fe5d6649e495f3e956e6a3bc938236a4bf91 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr84164.c @@ -0,0 +1,17 @@ +/* PR target/84164. */ + +int b, d; +unsigned long c; + +static inline __attribute__ ((always_inline)) void +bar (int e) +{ + e += __builtin_sub_overflow_p (b, d, c); + c = c << ((short) ~e & 3) | c >> (-((short) ~e & 3) & 63); +} + +int +foo (void) +{ + bar (~1); +}